FLAMEGPU · ptheywood · Nov 18, 2025 · Nov 13, 2025
diff --git a/cmake/CUDAArchitectures.cmake b/cmake/CUDAArchitectures.cmake
@@ -96,15 +96,15 @@ function(flamegpu_visualiser_set_cuda_architectures)
     # Query NVCC for the acceptable SM values, this is used in multiple places
     if(NOT DEFINED SUPPORTED_CUDA_ARCHITECTURES_NVCC)
         execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR)
-        # Match all comptue_XX or sm_XXs
-        string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
-        # Strip just the numeric component
-        string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
+        # Match all comptue_XX or sm_XX or lto_XX values, including single lowercase letter suffixes (a or f only supported so far, but allowing flexibility)
+        string(REGEX MATCHALL "'(sm|compute|lto)_([0-9]+[a-z]?)'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
+        # Strip out just the portiaon after the _, which is the value for CMAKE_CUDA_ARCHITECTURES
+        string(REGEX REPLACE "'(sm|compute|lto)_([0-9]+[a-z]?)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
         # Remove dupes and sort to build the correct list of supported CUDA_ARCH.
         list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCHITECTURES_NVCC)
         list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES_NVCC "")
         list(SORT SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-        # Store the supported arch's once and only once. This could be a cache  var given the cuda compiler should not be able to change without clearing th cache?
+        # Store the supported arch's once and only once in the parent scope.
         set(SUPPORTED_CUDA_ARCHITECTURES_NVCC ${SUPPORTED_CUDA_ARCHITECTURES_NVCC} PARENT_SCOPE)
     endif()
     list(LENGTH SUPPORTED_CUDA_ARCHITECTURES_NVCC SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT)
@@ -166,134 +166,41 @@ function(flamegpu_visualiser_set_cuda_architectures)
             set(using_keyword_arch TRUE)
         endif()
         # Cmake 3.18+ expects a list of 1 or more <sm>, <sm>-real or <sm>-virtual.
-        # CMake isn't aware of the exact SMS supported by the CUDA version afiak, but we have already queired nvcc for this (once and only once)
-        # If nvcc parsing worked and a single keyword option is not being used, attempt the validation:
-        if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0 AND NOT using_keyword_arch)
-            # Transform a copy of the list of supported architectures, to hopefully just contain numbers
+        # CMake isn't aware of the exact SMS supported by the CUDA version afiak, but we have already queried nvcc for this (once and only once)
+        # If nvcc parsing worked and a single keyword option is not being used, attempt the validation
+        # But do not emit this more than once per invocation of CMake. parent scope wasn't enough in this case so using a global property
+        get_property(
+            WARNING_EMITTED
+            GLOBAL PROPERTY
+            __flamegpu_visualiser_set_cuda_architectures_warning_emitted
+        )
+        if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0 AND NOT using_keyword_arch AND NOT WARNING_EMITTED)
+            # Transform a copy of the list of requested architectures, removing -real and -virutal CMake components
             set(archs ${CMAKE_CUDA_ARCHITECTURES})
             list(TRANSFORM archs REPLACE "(\-real|\-virtual)" "")
-            # If any of the specified architectures are not in the nvcc reported list, error.
+            # If any of the specified architectures are not in the nvcc reported list, raise a warning
             foreach(ARCH IN LISTS archs)
                 if(NOT ARCH IN_LIST SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-                    message(FATAL_ERROR
-                        " CMAKE_CUDA_ARCHITECTURES value `${ARCH}` is not supported by nvcc ${CMAKE_CUDA_COMPILER_VERSION}.\n"
+                    message(WARNING
+                        " CMAKE_CUDA_ARCHITECTURES value `${ARCH}` may not be supported by nvcc ${CMAKE_CUDA_COMPILER_VERSION}, compilation may fail\n"
                         " Supported architectures based on nvcc --help: \n"
                         "   ${SUPPORTED_CUDA_ARCHITECTURES_NVCC}\n")
                 endif()
             endforeach()
-            unset(archs)
+            # set the global property so this is not re-emitted for each project()
+            set_property(
+                GLOBAL PROPERTY
+                __flamegpu_visualiser_set_cuda_architectures_warning_emitted
+                TRUE
+            )
         endif()
     else()
         # Otherwise, set a mulit-arch default for good compatibility and performacne
-        # If we're using CMake >= 3.23, we can just use all-major, though we then have to find the minimum a different way?
-        if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.23")
-            set(CMAKE_CUDA_ARCHITECTURES "all-major")
-        else()
-            # For CMake < 3.23, we have to make our own all-major equivalent.
-            # If we have nvcc help outut, we can generate this from all the elements that end with a 0 (and the first element if it does not.)
-            if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0)
-                # If the lowest support arch is not major, add it to the default
-                list(GET SUPPORTED_CUDA_ARCHITECTURES_NVCC 0 lowest_supported)
-                if(NOT lowest_supported MATCHES "0$")
-                    list(APPEND default_archs ${lowest_supported})
-                endif()
-                unset(lowest_supported)
-                # For each architecture, if it is major add it to the default list
-                foreach(ARCH IN LISTS SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-                    if(ARCH MATCHES "0$")
-                        list(APPEND default_archs ${ARCH})
-                    endif()
-                endforeach()
-            else()
-                # If nvcc help output parsing failed, just use an informed guess option from CUDA 11.8
-                set(default_archs "35;50;60;70;80")
-                if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8)
-                    list(APPEND default_archs "90")
-                endif()
-                message(AUTHOR_WARNING
-                    "  ${CMAKE_CURRENT_FUNCTION} failed to parse NVCC --help output for default architecture generation\n"
-                    "  Using ${default_archs} based on CUDA 11.0 to 11.8."
-                )
-            endif()
-            # We actually want real for each arch, then virtual for the final, but only for library-provided values, to only embed one arch worth of ptx.
-            # So grab the last element of the list
-            list(GET default_archs -1 final)
-            # append -real to each element, to not embed ptx for that arch too
-            list(TRANSFORM default_archs APPEND "-real")
-            # add the -virtual version of the final element
-            list(APPEND default_archs "${final}-virtual")
-            # Set the value
-            set(CMAKE_CUDA_ARCHITECTURES ${default_archs})
-            #unset local vars
-            unset(default_archs)
-        endif()
+        # As we require CMake >= 3.23 (and CUDA >= 12 which includes all-major), we can just use all-major
+        set(CMAKE_CUDA_ARCHITECTURES "all-major")
     endif()
     # Promote the value to the parent's scope, where it is needed on the first invokation (might be fine with cache, but just incase)
     set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
     # Promote the value to the cache for reconfigure persistence, as the enable_language sets it on the cache
     set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "CUDA architectures" FORCE)
 endfunction()
-
-#[[[
-# Get the minimum CUDA Architecture from the current CMAKE_CUDA_ARCHITECTURES value if possible
-#
-# Gets the minimum CUDA architectuyre from teh current value of CMAKE_CUDA_ARCHITECTURES if possible, storing the result in the pass-by-reference return value
-# Supports CMAKE_CUDA_ARCHITECTURE values including integers, -real post-fixed integers, -virtual post-fixed integers, all-major and all.
-# Does not support native, instead returning -1.
-# all or all-major are supported by querying nvcc --help to detect the minimum built for.
-#
-# CUDA must be enabled as a language prior to this method being called, and CMAKE_CUDA_ARCHITECTURES must be defined and non-empty
-#
-# :param minimum_architecture: the minimum architecture set in CMAKE_CUDA_ARCHITECTURES
-# :type NO_VALIDATE_ARCHITECTURES: integer
-#]]
-function(flamegpu_visualiser_get_minimum_cuda_architecture minimum_architecture)
-    if(DEFINED CMAKE_CUDA_ARCHITECTURES)
-        # Cannot deal with native gracefully
-        if("native" IN_LIST CMAKE_CUDA_ARCHITECTURES)
-            # If it's native, we would need to exeucte some CUDA code to detect this. For now set -1.
-            set(flamegpu_visualiser_minimum_cuda_architecture -1)
-        # if all/all-major is specified, detect via nvcc --help. It must be the only option (CMake doens't validate this and generates bad gencodes otherwise)
-        elseif("all-major" IN_LIST CMAKE_CUDA_ARCHITECTURES OR "all" IN_LIST CMAKE_CUDA_ARCHITECTURES)
-            # Query NVCC for the acceptable SM values.
-            if(NOT DEFINED SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-                execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR)
-                # Match all comptue_XX or sm_XXs
-                string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
-                # Strip just the numeric component
-                string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
-                # Remove dupes and sort to build the correct list of supported CUDA_ARCH.
-                list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-                list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES_NVCC "")
-                list(SORT SUPPORTED_CUDA_ARCHITECTURES_NVCC)
-                # Store the supported arch's once and only once. This could be a cache  var given the cuda compiler should not be able to change without clearing th cache?
-                set(SUPPORTED_CUDA_ARCHITECTURES_NVCC ${SUPPORTED_CUDA_ARCHITECTURES_NVCC} PARENT_SCOPE)
-            endif()
-            list(LENGTH SUPPORTED_CUDA_ARCHITECTURES_NVCC SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT)
-            if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0)
-                # For both all and all-major, the lowest arch should be the lowest supported. This is true for CUDA <= 11.8 atleast.
-                list(GET SUPPORTED_CUDA_ARCHITECTURES_NVCC 0 lowest)
-                set(flamegpu_visualiser_minimum_cuda_architecture ${lowest})
-            else()
-                # If nvcc didn't give anything useful, set -1.
-                set(flamegpu_visualiser_minimum_cuda_architecture -1)
-            endif()
-        else()
-            # Otherwise it should just be a list of one or more <sm>/<sm>-real/<sm-virtual>
-            # Copy the list
-            set(archs ${CMAKE_CUDA_ARCHITECTURES})
-            # Replace occurances of -real and -virtual
-            list(TRANSFORM archs REPLACE "(\-real|\-virtual)" "")
-            # Sort the list numerically (natural option
-            list(SORT archs COMPARE NATURAL ORDER ASCENDING)
-            # Get the first element
-            list(GET archs 0 lowest)
-            # Set the value for later returning
-            set(flamegpu_visualiser_minimum_cuda_architecture ${lowest})
-        endif()
-        # Set the return value as required, effectively pass by reference.
-        set(${minimum_architecture} ${flamegpu_visualiser_minimum_cuda_architecture} PARENT_SCOPE)
-    else()
-        message(FATAL_ERROR "${CMAKE_CURRENT_FUNCTION}: CMAKE_CUDA_ARCHITECTURES is not set or is empty")
-    endif()
-endfunction()