@@ -15,6 +15,7 @@ if (CUDAToolkit_FOUND)
1515 # 80 == Ampere, asynchronous data loading, faster tensor core instructions
1616 # 86 == RTX 3000, needs CUDA v11.1
1717 # 89 == RTX 4000, needs CUDA v11.8
18+ # 120 == RTX 5000, needs CUDA v12.8
1819 #
1920 # XX-virtual == compile CUDA code as PTX, do JIT compilation to binary code on first run
2021 # XX-real == compile CUDA code as device code for this specific architecture
@@ -23,19 +24,33 @@ if (CUDAToolkit_FOUND)
2324 # The default behavior for a non-native is to build virtual architectures as needed to cover all features needed
2425 # for best performance and to also build real architectures for the most commonly used GPUs.
2526 if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" )
27+ # Use the GPUs available on this system
2628 set (CMAKE_CUDA_ARCHITECTURES "native" )
27- elseif (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
28- if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8" )
29- set (CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real" )
29+ else ()
30+ set (ARCH_LIST "" )
31+
32+ # Base architectures - depending on feature flags
33+ if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
34+ # FP16 support (Pascal and newer)
35+ list (APPEND ARCH_LIST "60-virtual" )
3036 else ()
31- set (CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real" )
37+ # Maxwell and newer
38+ list (APPEND ARCH_LIST "50-virtual" )
3239 endif ()
33- else ()
40+
41+ # Always included after base architecture assuming CUDA toolkit version is 11.1 or higher
42+ list (APPEND ARCH_LIST "61-virtual" "70-virtual" "75-virtual" "80-virtual" "86-real" )
43+
44+ # Version-dependent architectures for newer GPUs
3445 if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8" )
35- set (CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real" )
36- else ()
37- set (CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real" )
46+ list (APPEND ARCH_LIST "89-real" )
47+ endif ()
48+
49+ if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8" )
50+ list (APPEND ARCH_LIST "120-real" )
3851 endif ()
52+
53+ set (CMAKE_CUDA_ARCHITECTURES ${ARCH_LIST} )
3954 endif ()
4055 endif ()
4156 message (STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
0 commit comments