File tree Expand file tree Collapse file tree 3 files changed +6
-4
lines changed
Expand file tree Collapse file tree 3 files changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -139,10 +139,10 @@ if (LLAMA_CUBLAS)
139139 elseif (CUDAToolkit_VERSION VERSION_GREATER 12)
140140 add_compile_definitions (GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
141141 add_compile_definitions (KCPP_LIMIT_CUDA_MAX_ARCH=800)
142- set (CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual; 75-virtual;80-virtual" ) # lowest CUDA 12 standard + lowest for integer intrinsics
142+ set (CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;75-virtual;80-virtual" ) # lowest CUDA 12 standard + lowest for integer intrinsics
143143 else ()
144144 add_compile_definitions (KCPP_LIMIT_CUDA_MAX_ARCH=750) #will cause issues with ggml_cuda_highest_compiled_arch if removed
145- set (CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;70-virtual; 75-virtual" ) # lowest CUDA 12 standard + lowest for integer intrinsics
145+ set (CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;75-virtual" ) # lowest CUDA 12 standard + lowest for integer intrinsics
146146 endif ()
147147 endif ()
148148 message (STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
Original file line number Diff line number Diff line change @@ -226,15 +226,13 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
226226 -gencode arch=compute_35,code=compute_35 \
227227 -gencode arch=compute_50,code=compute_50 \
228228 -gencode arch=compute_61,code=compute_61 \
229- -gencode arch=compute_70,code=compute_70 \
230229 -gencode arch=compute_75,code=compute_75 \
231230 -DKCPP_LIMIT_CUDA_MAX_ARCH=750
232231
233232else ifdef LLAMA_ARCHES_CU12
234233NVCCFLAGS += -Wno-deprecated-gpu-targets \
235234 -gencode arch=compute_50,code=compute_50 \
236235 -gencode arch=compute_61,code=compute_61 \
237- -gencode arch=compute_70,code=compute_70 \
238236 -gencode arch=compute_75,code=compute_75 \
239237 -gencode arch=compute_80,code=compute_80 \
240238 -DKCPP_LIMIT_CUDA_MAX_ARCH=800
Original file line number Diff line number Diff line change @@ -136,6 +136,10 @@ static int ggml_cuda_highest_compiled_arch(const int arch) {
136136#ifndef KCPP_LIMIT_CUDA_MAX_ARCH
137137 return arch;
138138#else
139+ if (arch==GGML_CUDA_CC_VOLTA) // fix for kcpp, if volta try return 610 instead
140+ {
141+ return GGML_CUDA_CC_DP4A;
142+ }
139143 return (arch > KCPP_LIMIT_CUDA_MAX_ARCH ? KCPP_LIMIT_CUDA_MAX_ARCH : arch);
140144#endif
141145}
You can’t perform that action at this time.
0 commit comments