Skip to content

Commit 61c1ce9

Browse files
committed
Adapt LCPP refactor merge and reinstate Q6_0
And update stable-diffusion.h
1 parent 1851b0b commit 61c1ce9

File tree

8 files changed

+94
-67
lines changed

8 files changed

+94
-67
lines changed

CMakeLists.txt

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ set(LLAMA_SCHED_MAX_COPIES "1" CACHE STRING "llama: max input copies for pipeli
6969
option(LLAMA_CUDA_ENABLE_UNIFIED_MEMORY "llama: enable to avoid OOM in Full Offload" OFF)
7070

7171
option(GGML_IQK_MUL_MAT "ggml: use optimized iqk matrix multiplications" OFF)
72-
option(GGML_USE_LLAMA_CPP_MAINLINE "ggml: use Llama CPP mainline MatMul " ON)
72+
option(GGML_USE_LLAMA_CPP_MAINLINE "ggml: use Llama CPP mainline MatMul " OFF)
7373

7474
#
7575
# Compile flags
@@ -85,7 +85,7 @@ find_package(Threads REQUIRED)
8585
add_compile_definitions(LOG_DISABLE_LOGS)
8686

8787
file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu")
88-
list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu")
88+
list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda/ggml-cuda.cu")
8989
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
9090
list(APPEND GGML_SOURCES_CUDA ${SRCS})
9191
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
@@ -329,7 +329,7 @@ if (LLAMA_HIPBLAS)
329329
if (${hipblas_FOUND} AND ${hip_FOUND})
330330
message(STATUS "HIP and hipBLAS found")
331331
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
332-
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda.cu")
332+
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu")
333333
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
334334
list(APPEND GGML_SOURCES_ROCM ${SRCS})
335335
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
@@ -686,65 +686,65 @@ if (GGML_IQK_MUL_MAT)
686686
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
687687
add_compile_definitions(GGML_USE_IQK_MULMAT)
688688
elseif (GGML_USE_LLAMA_CPP_MAINLINE)
689-
add_library(ggml
690-
ggml/src/ggml.c
691-
ggml/include/ggml.h
692-
ggml/src/ggml-cpu.c
693-
ggml/include/ggml-cpu.h
694-
ggml/src/ggml-alloc.c
695-
ggml/include/ggml-alloc.h
696-
ggml/src/ggml-backend.cpp
697-
ggml/src/ggml-backend-impl.h
698-
ggml/include/ggml-backend.h
699-
ggml/include/ggml-cpp.h
700-
ggml/src/ggml-quants.c
701-
ggml/src/ggml-quants.h
702-
ggml/src/llamafile/sgemm.cpp
703-
ggml/src/llamafile/sgemm.h
704-
ggml/src/ggml-aarch64.c
705-
ggml/src/ggml-aarch64.h
706-
${GGML_SOURCES_CUDA})
707-
target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
708-
target_compile_features(ggml PUBLIC c_std_11) # don't bump
709-
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
710-
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
689+
# add_library(ggml
690+
# ggml/src/ggml.c
691+
# ggml/include/ggml.h
692+
# ggml/src/ggml-cpu.c
693+
# ggml/include/ggml-cpu.h
694+
# ggml/src/ggml-alloc.c
695+
# ggml/include/ggml-alloc.h
696+
# ggml/src/ggml-backend.cpp
697+
# ggml/src/ggml-backend-impl.h
698+
# ggml/include/ggml-backend.h
699+
# ggml/include/ggml-cpp.h
700+
# ggml/src/ggml-quants.c
701+
# ggml/src/ggml-quants.h
702+
# ggml/src/llamafile/sgemm.cpp
703+
# ggml/src/llamafile/sgemm.h
704+
# ggml/src/ggml-aarch64.c
705+
# ggml/src/ggml-aarch64.h
706+
# ${GGML_SOURCES_CUDA})
707+
# target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
708+
# target_compile_features(ggml PUBLIC c_std_11) # don't bump
709+
# target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
710+
# set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
711711
add_compile_definitions(GGML_USE_LLAMA_CPP_MAINLINE)
712712
else ()
713-
add_library(ggml
714-
ggml/src/ggml.c
715-
ggml/include/ggml.h
716-
ggml/src/ggml-cpu/ggml-cpu.c
717-
ggml/include/ggml-cpu.h
718-
ggml/src/ggml-alloc.c
719-
ggml/include/ggml-alloc.h
720-
ggml/src/ggml-backend.cpp
721-
ggml/src/ggml-backend-impl.h
722-
ggml/include/ggml-backend.h
723-
ggml/include/ggml-cpp.h
724-
ggml/src/ggml-quants.c
725-
ggml/src/ggml-quants.h
726-
ggml/src/ggml-cpu/llamafile/sgemm.cpp
727-
ggml/src/ggml-cpu/llamafile/sgemm.h
728-
ggml/src/ggml-aarch64.c
729-
ggml/src/ggml-aarch64.h
730-
ggml/src/ggml-threading.cpp
731-
ggml/src/ggml-cpu/ggml-cpu.cpp
732-
ggml/src/ggml-cpu/ggml-cpu-aarch64.c
733-
ggml/src/ggml-cpu/ggml-cpu-aarch64.h
734-
ggml/src/ggml-cpu/ggml-cpu-quants.c
735-
ggml/src/ggml-cpu/ggml-cpu-quants.h
736-
ggml/src/ggml-backend-reg.cpp
737-
${GGML_SOURCES_CUDA})
738-
target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
739-
target_compile_features(ggml PUBLIC c_std_11) # don't bump
740-
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
741-
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
713+
add_library(ggml
714+
ggml/src/ggml.c
715+
ggml/include/ggml.h
716+
ggml/src/ggml-cpu/ggml-cpu.c
717+
ggml/include/ggml-cpu.h
718+
ggml/src/ggml-alloc.c
719+
ggml/include/ggml-alloc.h
720+
ggml/src/ggml-backend.cpp
721+
ggml/src/ggml-backend-impl.h
722+
ggml/include/ggml-backend.h
723+
ggml/include/ggml-cpp.h
724+
ggml/src/ggml-quants.c
725+
ggml/src/ggml-quants.h
726+
ggml/src/ggml-cpu/llamafile/sgemm.cpp
727+
ggml/src/ggml-cpu/llamafile/sgemm.h
728+
ggml/src/ggml-aarch64.c
729+
ggml/src/ggml-aarch64.h
730+
ggml/src/ggml-threading.cpp
731+
ggml/src/ggml-cpu/ggml-cpu.cpp
732+
ggml/src/ggml-cpu/ggml-cpu-aarch64.c
733+
ggml/src/ggml-cpu/ggml-cpu-aarch64.h
734+
ggml/src/ggml-cpu/ggml-cpu-quants.c
735+
ggml/src/ggml-cpu/ggml-cpu-quants.h
736+
ggml/src/ggml-backend-reg.cpp
737+
${GGML_SOURCES_CUDA})
738+
target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools)
739+
target_compile_features(ggml PUBLIC c_std_11) # don't bump
740+
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
741+
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
742742
endif()
743743

744744
add_library(ggml_v1
745745
otherarch/ggml_v1.c
746746
otherarch/ggml_v1.h)
747-
target_include_directories(ggml_v1 PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
747+
target_include_directories(ggml_v1 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools)
748748
target_compile_features(ggml_v1 PUBLIC c_std_11) # don't bump
749749
target_link_libraries(ggml_v1 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
750750
set_target_properties(ggml_v1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -754,7 +754,7 @@ add_library(ggml_v2
754754
otherarch/ggml_v2.h
755755
${GGML_V2_CUDA_SOURCES}
756756
${GGML_V2_LEGACY_CUDA_SOURCES})
757-
target_include_directories(ggml_v2 PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
757+
target_include_directories(ggml_v2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools)
758758
target_compile_features(ggml_v2 PUBLIC c_std_11) # don't bump
759759
target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
760760
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -763,7 +763,7 @@ add_library(ggml_v3
763763
otherarch/ggml_v3.c
764764
otherarch/ggml_v3.h
765765
${GGML_V3_CUDA_SOURCES})
766-
target_include_directories(ggml_v3 PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools)
766+
target_include_directories(ggml_v3 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools)
767767
target_compile_features(ggml_v3 PUBLIC c_std_11) # don't bump
768768
target_link_libraries(ggml_v3 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
769769
set_target_properties(ggml_v3 PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -780,36 +780,36 @@ add_library(common2
780780
src/unicode.h
781781
src/unicode.cpp
782782
src/unicode-data.cpp)
783-
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
783+
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
784784
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
785785
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
786786
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
787787

788788
add_library(sdtype_adapter
789789
otherarch/sdcpp/sdtype_adapter.cpp)
790-
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
790+
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
791791
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
792792
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
793793
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
794794

795795
add_library(whisper_adapter
796796
otherarch/whispercpp/whisper_adapter.cpp)
797-
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
797+
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
798798
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
799799
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
800800
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
801801

802802
add_library(gpttype_adapter
803803
gpttype_adapter.cpp)
804-
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
804+
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
805805
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
806806
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
807807
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
808808

809809
if (LLAMA_CUBLAS)
810810
set(TARGET koboldcpp_cublas)
811811
add_library(${TARGET} SHARED expose.cpp expose.h)
812-
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
812+
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
813813
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
814814
set_target_properties(${TARGET} PROPERTIES PREFIX "")
815815
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
@@ -821,7 +821,7 @@ endif()
821821
if (LLAMA_HIPBLAS)
822822
set(TARGET koboldcpp_hipblas)
823823
add_library(${TARGET} SHARED expose.cpp expose.h)
824-
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
824+
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
825825
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
826826
set_target_properties(${TARGET} PROPERTIES PREFIX "")
827827
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,10 @@ void quantize_row_q5_1(const float * restrict x, void * restrict y, int64_t k) {
707707
quantize_row_q5_1_ref(x, y, k);
708708
}
709709

710+
void quantize_row_q6_0(const float * restrict x, void * restrict y, int64_t k) {
711+
quantize_row_q6_0_ref(x, y, k);
712+
}
713+
710714
void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k) {
711715
assert(QK8_0 == 32);
712716
assert(k % QK8_0 == 0);
@@ -3328,6 +3332,21 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
33283332
*s = sumf;
33293333
}
33303334

3335+
void ggml_vec_dot_q6_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
3336+
#if GGML_USE_IQK_MULMAT
3337+
#ifdef __AVX2__
3338+
const enum ggml_type vec_dot_type = GGML_TYPE_Q8_1;
3339+
#else
3340+
const enum ggml_type vec_dot_type = GGML_TYPE_Q8_0;
3341+
#endif
3342+
if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q6_0, vx, bx, vec_dot_type, vy, by, s, bs, 0, 1)) {
3343+
return;
3344+
}
3345+
#endif
3346+
// TODO
3347+
*s = 0;
3348+
}
3349+
33313350
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
33323351
const int qk = QK8_0;
33333352
const int nb = n / qk;

ggml/src/ggml-cpu/ggml-cpu-quants.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
1818
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1919
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2020
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
21+
void quantize_row_q6_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2122

2223
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2324
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -38,6 +39,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
3839
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
3940
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4041
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
42+
void ggml_vec_dot_q6_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4143

4244
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4345
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
301301
.nrows = 1,
302302
},
303303
[GGML_TYPE_Q6_0] = {
304+
.from_float = quantize_row_q6_0,
304305
.vec_dot = ggml_vec_dot_q6_0_q8_0,
305306
.vec_dot_type = GGML_TYPE_Q8_0,
306307
.nrows = 1,

ggml/src/ggml-quants.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2130,6 +2130,10 @@ static void quantize_row_q6_0_impl(const float * restrict x, block_q6_0 * restri
21302130
}
21312131

21322132
size_t quantize_q6_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2133+
if (!quant_weights) {
2134+
quantize_row_q6_0_ref(src, dst, (int64_t)nrow*n_per_row);
2135+
return nrow * ggml_row_size(GGML_TYPE_Q6_0, n_per_row);
2136+
}
21332137
size_t row_size = ggml_row_size(GGML_TYPE_Q6_0, n_per_row);
21342138
char * qrow = (char *)dst;
21352139
for (int64_t row = 0; row < nrow; ++row) {

ggml/src/ggml-quants.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ GGML_API void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K *
3030
GGML_API void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k);
3131
GGML_API void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k);
3232
GGML_API void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k);
33-
GGML_API void quantize_row_q8_K64_ref(const float * GGML_RESTRICT x, block_q8_K64 * GGML_RESTRICT y, int64_t k);
33+
// GGML_API void quantize_row_q8_K64_ref(const float * GGML_RESTRICT x, block_q8_K64 * GGML_RESTRICT y, int64_t k);
3434

3535

3636

ggml/src/ggml.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@
2222
#include "ggml-backend.h"
2323
#include "ggml-impl.h"
2424
#include "ggml-cpu-impl.h"
25-
#include "ggml-quants.h"
25+
#include "ggml-threading.h"
2626
#include "ggml.h"
27+
#include "ggml-quants.h"
2728
#include "ggml-aarch64.h"
2829
#endif
2930

@@ -670,7 +671,6 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
670671
.type_size = sizeof(block_q6_0),
671672
.is_quantized = true,
672673
.to_float = (ggml_to_float_t) dequantize_row_q6_0,
673-
.from_float = quantize_row_q6_0,
674674
.from_float_ref = (ggml_from_float_t) quantize_row_q6_0_ref,
675675
},
676676
[GGML_TYPE_Q8_0] = {
@@ -5065,7 +5065,6 @@ struct ggml_tensor * ggml_opt_step_adamw(
50655065

50665066
////////////////////////////////////////////////////////////////////////////////
50675067

5068-
50695068
struct ggml_hash_set ggml_hash_set_new(size_t size) {
50705069
size = ggml_hash_size(size);
50715070
struct ggml_hash_set result;

otherarch/sdcpp/stable-diffusion.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ enum sd_type_t {
9595
SD_TYPE_Q4_0_8_8 = 33,
9696
SD_TYPE_TQ1_0 = 34,
9797
SD_TYPE_TQ2_0 = 35,
98+
//
99+
SD_TYPE_Q6_0 = 133,
98100
SD_TYPE_COUNT,
99101
};
100102

0 commit comments

Comments
 (0)