@@ -201,7 +201,7 @@ else()
201201 FetchContent_MakeAvailable(libtorch)
202202
203203 find_package (Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
204- message (STATUS "Downloading and using libtorch 2.7.1 for cuda ${CUDA_VERSION } at ${libtorch_SOURCE_DIR} " )
204+ message (STATUS "Downloading and using libtorch 2.7.1 for cuda ${CUDAToolkit_VERSION } at ${libtorch_SOURCE_DIR} " )
205205endif ()
206206
207207# carry over torch flags to the rest of the project
@@ -214,14 +214,21 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DC10_USE_GLOG")
214214message (STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS} " )
215215message (STATUS "CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG} " )
216216
217- set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -O3)
218217# The following definitions must be undefined since half-precision operation is required.
219- set (CUDA_NVCC_FLAGS ${ CUDA_NVCC_FLAGS}
218+ list ( APPEND CUDA_NVCC_FLAGS
220219 -U__CUDA_NO_HALF_OPERATORS__
221220 -U__CUDA_NO_HALF_CONVERSIONS__
222221 -U__CUDA_NO_HALF2_OPERATORS__
223- -U__CUDA_NO_BFLOAT16_CONVERSIONS__)
224- set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math -Xfatbin -compress-all )
222+ -U__CUDA_NO_BFLOAT16_CONVERSIONS__
223+ -O3
224+ --use_fast_math
225+ -Xfatbin
226+ -compress-all )
227+
228+ # Enable aggresive fatbin compress for CUDA 12.8 or later.
229+ if (${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL 12.8)
230+ list (APPEND CUDA_NVCC_FLAGS -compress-mode=size)
231+ endif ()
225232message (STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS} " )
226233
227234# enable testing in this directory so we can do a top-level `make test`.
0 commit comments