Skip to content

Commit 7831b42

Browse files
committed
ggml : add repack testing support
This commit add support for testing the ggml-cpu repack feature which enables the repackaging of quantized data into more optimal layout for matrix multiplication for specific hardware architectures. The motivation is to enable the testing of a cpu backend that uses repacked data against a reference cpu backend that does not use repacked data. Building: ```console $ cmake -B build \ -DGGML_CPU_REF_BACKEND=ON -DGGML_BACKEND_DL=ON \ -DGGML_CPU_ALL_VARIANTS=ON ``` List availble cpu architectures/variants: ```console $ ./build/bin/test-backend-ops cpu-variants --list CPU variants: CPU-alderlake - 12th Gen Intel(R) Core(TM) i7-1260P ``` Run tests: ```console ./build-ref/bin/test-backend-ops cpu-variants \ --variant CPU-alderlake \ -o "MUL_MAT(type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)" Testing CPU variant 'CPU-alderlake' against cpu-ref backend... repack: repack tensor a with q4_0_8x8 MUL_MAT(type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1): OK repack: repack tensor a with q4_0_8x8 MUL_MAT(type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1): OK 14491/14491 tests passed ``` All matrix multiplication tests can be run by use specifying `-o "MUL_MAT"` but it may be harder to spot the ones that use repacking.
1 parent ec07008 commit 7831b42

File tree

8 files changed

+436
-41
lines changed

8 files changed

+436
-41
lines changed

ggml/CMakeLists.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
252252
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
253253

254254
# extra artifacts
255-
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
256-
option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF)
257-
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
255+
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
256+
option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF)
257+
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
258258

259259
#
260260
# dependencies
@@ -284,7 +284,9 @@ add_subdirectory(src)
284284

285285
if (GGML_BUILD_TESTS)
286286
enable_testing()
287-
add_subdirectory(tests)
287+
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests")
288+
add_subdirectory(tests)
289+
endif ()
288290
endif ()
289291

290292
if (GGML_BUILD_EXAMPLES)

ggml/include/ggml-backend.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ extern "C" {
243243
// Load all known backends from dynamic libraries
244244
GGML_API void ggml_backend_load_all(void);
245245
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
246+
// Load all variants for a backend and register them
247+
GGML_API void ggml_backend_load_all_variants(const char * name);
248+
GGML_API void ggml_backend_load_variant(const char * name, const char * variant);
246249

247250
//
248251
// Backend scheduler

ggml/src/CMakeLists.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,21 @@ if (GGML_CPU_REF_BACKEND)
397397
set(GGML_CPU_HBM OFF)
398398
set(GGML_OPENMP OFF)
399399
set(GGML_CPU_KLEIDIAI OFF)
400-
ggml_add_cpu_backend_variant_impl(ref)
400+
set(GGML_CPU_REPACK OFF)
401+
set(GGML_ACCELERATE OFF)
402+
403+
ggml_add_cpu_backend_variant(ref)
404+
405+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM|AARCH64")
406+
target_compile_options(ggml-cpu-ref PRIVATE
407+
-U__ARM_NEON
408+
-U__ARM_FEATURE_FMA
409+
-U__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
410+
-U__ARM_FEATURE_DOTPROD
411+
-U__ARM_FEATURE_MATMUL_INT8
412+
-U__ARM_FEATURE_SVE
413+
)
414+
endif()
401415
target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF)
402416
endif()
403417

ggml/src/ggml-backend-reg.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,3 +609,72 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
609609
ggml_backend_load_best("cpu-ref", silent, dir_path);
610610
#endif
611611
}
612+
613+
void ggml_backend_load_all_variants(const char * name) {
614+
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
615+
const fs::path name_path = fs::u8path(name);
616+
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
617+
const fs::path file_extension = backend_filename_extension();
618+
619+
std::vector<fs::path> search_paths;
620+
#ifdef GGML_BACKEND_DIR
621+
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
622+
#endif
623+
// default search paths: executable directory, current directory
624+
search_paths.push_back(get_executable_path());
625+
search_paths.push_back(fs::current_path());
626+
627+
for (const auto & search_path : search_paths) {
628+
if (!fs::exists(search_path)) {
629+
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
630+
continue;
631+
}
632+
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
633+
for (const auto & entry : dir_it) {
634+
if (entry.is_regular_file()) {
635+
auto filename = entry.path().filename();
636+
auto ext = entry.path().extension();
637+
if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) {
638+
fs::path path = search_path / filename;
639+
ggml_backend_reg_t backend = get_reg().load_backend(path, false);
640+
if (backend == nullptr) {
641+
GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(entry.path()).c_str());
642+
}
643+
644+
}
645+
}
646+
}
647+
}
648+
}
649+
650+
void ggml_backend_load_variant(const char * name, const char * variant) {
651+
const fs::path name_path = fs::u8path(name);
652+
const fs::path variant_path = fs::u8path(variant);
653+
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
654+
const fs::path target_filename = file_prefix.native() + variant_path.native() + backend_filename_extension().native();
655+
656+
std::vector<fs::path> search_paths;
657+
#ifdef GGML_BACKEND_DIR
658+
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
659+
#endif
660+
// default search paths: executable directory, current directory
661+
search_paths.push_back(get_executable_path());
662+
search_paths.push_back(fs::current_path());
663+
664+
for (const auto & search_path : search_paths) {
665+
if (!fs::exists(search_path)) {
666+
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
667+
continue;
668+
}
669+
670+
fs::path full_path = search_path / target_filename;
671+
if (fs::exists(full_path) && fs::is_regular_file(full_path)) {
672+
ggml_backend_reg_t backend = get_reg().load_backend(full_path, false);
673+
if (backend == nullptr) {
674+
GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(full_path).c_str());
675+
} else {
676+
return;
677+
}
678+
}
679+
}
680+
}

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ struct ggml_backend_cpu_context {
108108
};
109109

110110
static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
111-
return "CPU";
111+
return GGML_CPU_VARIANT_NAME;
112112

113113
GGML_UNUSED(backend);
114114
}

ggml/src/ggml-cpu/repack.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1869,8 +1869,43 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
18691869
return nullptr;
18701870
}
18711871

1872+
static bool supports_tensor(const struct ggml_tensor * op) {
1873+
if (op->op == GGML_OP_MUL_MAT &&
1874+
op->src[0]->buffer &&
1875+
(ggml_n_dims(op->src[0]) == 2) && ggml_repack_get_optimal_repack_type(op->src[0])) {
1876+
1877+
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
1878+
return false;
1879+
}
1880+
1881+
if (op->src[1]->type == GGML_TYPE_F32) {
1882+
return true;
1883+
}
1884+
1885+
} else if (op->op == GGML_OP_MUL_MAT_ID && op->src[0]->buffer &&
1886+
(ggml_n_dims(op->src[0]) == 3) && ggml_repack_get_optimal_repack_type(op->src[0])) {
1887+
1888+
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
1889+
return false;
1890+
}
1891+
1892+
if (op->src[1]->type == GGML_TYPE_F32) {
1893+
return true;
1894+
}
1895+
}
1896+
return false;
1897+
}
1898+
18721899
static enum ggml_status ggml_backend_cpu_repack_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
1873-
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor));
1900+
if (tensor->op == GGML_OP_NONE) {
1901+
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor));
1902+
tensor->buffer = buffer;
1903+
}
1904+
1905+
if (supports_tensor(tensor)) {
1906+
tensor->src[0]->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor->src[0]));
1907+
tensor->src[0]->buffer = buffer;
1908+
}
18741909

18751910
GGML_UNUSED(buffer);
18761911
return GGML_STATUS_SUCCESS;

tests/CMakeLists.txt

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,7 @@ if (NOT LLAMA_SANITIZE_ADDRESS)
202202
endif()
203203
llama_build_and_test(test-gguf.cpp)
204204
llama_build_and_test(test-backend-ops.cpp)
205-
206-
if (GGML_CPU_REF_BACKEND)
207-
if (WIN32)
208-
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/ggml-cpu-ref.dll")
209-
else()
210-
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/libggml-cpu-ref.so")
211-
endif()
212-
target_compile_definitions(test-backend-ops PRIVATE GGML_CPU_REF_BACKEND_PATH="${GGML_CPU_REF_BACKEND_PATH}")
213-
endif()
205+
target_include_directories(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
214206

215207
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
216208
llama_build_and_test(test-autorelease.cpp LABEL "model")

0 commit comments

Comments
 (0)