Skip to content

Commit 37b572e

Browse files
committed
fixed hardcode qk=128 bug
1 parent 23649e5 commit 37b572e

File tree

3 files changed

+14
-9
lines changed

3 files changed

+14
-9
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ models/**
66
.vscode/**
77

88
**/__pycache__/**
9+
10+
**/build-info.cpp

ggml/src/CMakeLists.txt

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
include(CheckCXXCompilerFlag)
22

3-
# Set default value for QK4_0
4-
set(QK4_0 "32" CACHE STRING "Quantization block size for Q4_0 (32, 64, 128, 256)")
5-
6-
# Only check the value if it was explicitly set by the user
7-
if (DEFINED QK4_0 AND NOT QK4_0 STREQUAL "32")
3+
# Only process QK4_0 if it is explicitly set
4+
if (DEFINED QK4_0)
5+
# Ensure QK4_0 is an integer and within the allowed values
86
if (NOT QK4_0 MATCHES "^(32|64|128|256)$")
97
message(FATAL_ERROR "Invalid QK4_0 value: Must be one of {32, 64, 128, 256}")
108
endif()
9+
10+
# Define the macro for compilation
1111
add_compile_definitions(QK4_0=${QK4_0})
12-
endif()
1312

14-
message(STATUS "QK4_0 is set to: ${QK4_0}")
13+
message(STATUS "QK4_0 is set to: ${QK4_0}")
14+
else()
15+
message(STATUS "QK4_0 is not set. Using default behavior.")
16+
endif()
1517

1618
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
1719

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,7 +1741,7 @@ static inline __m128i get_scale_shuffle(int i) {
17411741
#endif
17421742

17431743
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
1744-
const int qk = 128;
1744+
const int qk = QK4_0;
17451745
const int nb = n / qk;
17461746

17471747
assert(n % qk == 0);
@@ -2317,7 +2317,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
23172317
}
23182318

23192319
sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
2320-
#endif
2320+
#else
23212321
for (; ib < nb; ++ib) {
23222322
int sumi0 = 0;
23232323
int sumi1 = 0;
@@ -2333,6 +2333,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
23332333
int sumi = sumi0 + sumi1;
23342334
sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
23352335
}
2336+
#endif
23362337
#endif
23372338
*s = sumf;
23382339
}

0 commit comments

Comments
 (0)