Skip to content

Commit 8f647b7

Browse files
committed
another attempt to tip the scales (volta)
1 parent 40d3d83 commit 8f647b7

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,10 @@ if (LLAMA_CUBLAS)
139139
elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
140140
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
141141
add_compile_definitions(KCPP_LIMIT_CUDA_MAX_ARCH=800)
142-
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
142+
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
143143
else()
144144
add_compile_definitions(KCPP_LIMIT_CUDA_MAX_ARCH=750) #will cause issues with ggml_cuda_highest_compiled_arch if removed
145-
set(CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;70-virtual;75-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
145+
set(CMAKE_CUDA_ARCHITECTURES "35-virtual;50-virtual;61-virtual;75-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
146146
endif()
147147
endif()
148148
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")

Makefile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,13 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
226226
-gencode arch=compute_35,code=compute_35 \
227227
-gencode arch=compute_50,code=compute_50 \
228228
-gencode arch=compute_61,code=compute_61 \
229-
-gencode arch=compute_70,code=compute_70 \
230229
-gencode arch=compute_75,code=compute_75 \
231230
-DKCPP_LIMIT_CUDA_MAX_ARCH=750
232231

233232
else ifdef LLAMA_ARCHES_CU12
234233
NVCCFLAGS += -Wno-deprecated-gpu-targets \
235234
-gencode arch=compute_50,code=compute_50 \
236235
-gencode arch=compute_61,code=compute_61 \
237-
-gencode arch=compute_70,code=compute_70 \
238236
-gencode arch=compute_75,code=compute_75 \
239237
-gencode arch=compute_80,code=compute_80 \
240238
-DKCPP_LIMIT_CUDA_MAX_ARCH=800

ggml/src/ggml-cuda/common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ static int ggml_cuda_highest_compiled_arch(const int arch) {
136136
#ifndef KCPP_LIMIT_CUDA_MAX_ARCH
137137
return arch;
138138
#else
139+
if(arch==GGML_CUDA_CC_VOLTA) //fix for kcpp, if volta try return 610 instead
140+
{
141+
return GGML_CUDA_CC_DP4A;
142+
}
139143
return (arch > KCPP_LIMIT_CUDA_MAX_ARCH ? KCPP_LIMIT_CUDA_MAX_ARCH : arch);
140144
#endif
141145
}

0 commit comments

Comments
 (0)