Skip to content

Commit 7745263

Browse files
committed
ggml : add CPU backend reference implementation (wip)
This commit introduces a CPU reference implementation for GGML, designed primarily for testing and validation purposes. The motivation for this addition is to have a pure C CPU backend implementation that does not use any hardware-specific optimizations or intrinsics. This will allow for testing the CPU backend variants against the reference implementation to ensure correctness
1 parent 138c87c commit 7745263

File tree

7 files changed

+141
-2
lines changed

7 files changed

+141
-2
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file
255255

256256
# extra artifacts
257257
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
258+
option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF)
258259
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
259260

260261
#

ggml/src/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,15 @@ ggml_add_backend(WebGPU)
388388
ggml_add_backend(zDNN)
389389
ggml_add_backend(OpenCL)
390390

391+
if (GGML_CPU_REF_BACKEND)
392+
if (NOT GGML_BACKEND_DL)
393+
message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL")
394+
endif()
395+
set(DGGML_CPU_GENERIC ON)
396+
ggml_add_cpu_backend_variant_impl(ref)
397+
target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF)
398+
endif()
399+
391400
foreach (target ggml-base ggml)
392401
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
393402
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump

ggml/src/ggml-backend-reg.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,4 +596,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
596596
if (backend_path) {
597597
ggml_backend_load(backend_path);
598598
}
599+
#ifdef GGML_USE_CPU_REF
600+
ggml_backend_load_best("cpu-ref", silent, dir_path);
601+
#endif
599602
}

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
5252
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
5353
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
5454

55+
if (tag_name)
56+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}")
57+
else()
58+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU")
59+
endif()
60+
5561
if (APPLE AND GGML_ACCELERATE)
5662
find_library(ACCELERATE_FRAMEWORK Accelerate)
5763
if (ACCELERATE_FRAMEWORK)

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context {
327327
};
328328

329329
static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
330-
return "CPU";
330+
return GGML_CPU_VARIANT_NAME;
331331

332332
GGML_UNUSED(dev);
333333
}

tests/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,15 @@ endif()
199199
llama_build_and_test(test-gguf.cpp)
200200
llama_build_and_test(test-backend-ops.cpp)
201201

202+
if (GGML_CPU_REF_BACKEND)
203+
if (WIN32)
204+
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/ggml-cpu-ref.dll")
205+
else()
206+
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/libggml-cpu-ref.so")
207+
endif()
208+
target_compile_definitions(test-backend-ops PRIVATE GGML_CPU_REF_BACKEND_PATH="${GGML_CPU_REF_BACKEND_PATH}")
209+
endif()
210+
202211
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
203212
llama_build_and_test(test-autorelease.cpp LABEL "model")
204213

tests/test-backend-ops.cpp

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <string_view>
4040
#include <thread>
4141
#include <vector>
42+
#include <unordered_map>
4243

4344
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
4445
size_t nels = ggml_nelements(tensor);
@@ -324,6 +325,7 @@ enum test_mode {
324325
MODE_PERF,
325326
MODE_GRAD,
326327
MODE_SUPPORT,
328+
MODE_CPU_VARIANTS,
327329
};
328330

329331
// Output format support similar to llama-bench
@@ -6906,18 +6908,99 @@ static void show_test_coverage() {
69066908
printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0);
69076909
}
69086910

6911+
static bool test_cpu_variant(const char * variant_name, const char * op_names_filter,
6912+
const char * params_filter, printer * output_printer) {
6913+
6914+
ggml_backend_t backend_ref = ggml_backend_init_by_name("CPU-ref", nullptr);
6915+
if (backend_ref == nullptr) {
6916+
printf("Error: CPU-ref backend not found. Make sure it's built and available.\n");
6917+
return false;
6918+
}
6919+
6920+
ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr);
6921+
if (backend_variant == nullptr) {
6922+
printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name);
6923+
printf("Use --list to see available variants.\n");
6924+
ggml_backend_free(backend_ref);
6925+
return false;
6926+
}
6927+
6928+
printf("Testing CPU variant '%s' against cpu-ref backend...\n\n", variant_name);
6929+
6930+
auto test_cases = make_test_cases_eval();
6931+
6932+
if (params_filter != nullptr) {
6933+
std::regex regex(params_filter);
6934+
auto it = test_cases.begin();
6935+
while (it != test_cases.end()) {
6936+
std::string test_params = (*it)->vars();
6937+
if (!std::regex_search(test_params, regex)) {
6938+
it = test_cases.erase(it);
6939+
} else {
6940+
it++;
6941+
}
6942+
}
6943+
}
6944+
6945+
size_t n_ok = 0;
6946+
for (auto & test : test_cases) {
6947+
if (test->eval(backend_variant, backend_ref, op_names_filter, output_printer)) {
6948+
n_ok++;
6949+
}
6950+
}
6951+
6952+
output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false));
6953+
6954+
ggml_backend_free(backend_variant);
6955+
ggml_backend_free(backend_ref);
6956+
6957+
return n_ok == test_cases.size();
6958+
}
6959+
6960+
static void list_cpu_variants() {
6961+
std::unordered_map<std::string, std::string> variant_names;
6962+
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
6963+
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
6964+
if (strstr(ggml_backend_reg_name(reg), "CPU") != nullptr) {
6965+
for (size_t j = 0; j < ggml_backend_reg_dev_count(reg); j++) {
6966+
ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, j);
6967+
const char * name = ggml_backend_dev_name(dev);
6968+
if (strcmp(name, "CPU-ref") != 0) {
6969+
variant_names.emplace(name, ggml_backend_dev_description(dev));
6970+
}
6971+
}
6972+
}
6973+
}
6974+
6975+
if (variant_names.size() == 0) {
6976+
printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n");
6977+
printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n");
6978+
return;
6979+
}
6980+
6981+
printf("CPU variants:\n");
6982+
for (const auto & it : variant_names) {
6983+
printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str());
6984+
}
6985+
}
6986+
69096987
static void usage(char ** argv) {
6910-
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops] [--show-coverage]\n", argv[0]);
6988+
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]);
69116989
printf(" valid modes:\n");
69126990
printf(" - test (default, compare with CPU backend for correctness)\n");
69136991
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
69146992
printf(" - perf (performance evaluation)\n");
69156993
printf(" - support (probe backend operation support)\n");
6994+
printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n");
69166995
printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n");
69176996
printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n");
69186997
printf(" --output specifies output format (default: console, options: console, sql, csv)\n");
69196998
printf(" --list-ops lists all available GGML operations\n");
6999+
printf(" --list-cpu-variants lists all available CPU backend variants\n");
69207000
printf(" --show-coverage shows test coverage\n");
7001+
printf(" cpu-variants mode options:\n");
7002+
printf(" --list lists available CPU variants on this system\n");
7003+
printf(" --variant <name> test specific CPU variant against cpu-ref backend\n");
69217004
}
69227005

69237006
int main(int argc, char ** argv) {
@@ -6926,6 +7009,8 @@ int main(int argc, char ** argv) {
69267009
const char * op_names_filter = nullptr;
69277010
const char * backend_filter = nullptr;
69287011
const char * params_filter = nullptr;
7012+
const char * cpu_variant_name = nullptr;
7013+
bool list_variants_flag = false;
69297014

69307015
for (int i = 1; i < argc; i++) {
69317016
if (strcmp(argv[i], "test") == 0) {
@@ -6936,6 +7021,8 @@ int main(int argc, char ** argv) {
69367021
mode = MODE_GRAD;
69377022
} else if (strcmp(argv[i], "support") == 0) {
69387023
mode = MODE_SUPPORT;
7024+
} else if (strcmp(argv[i], "cpu-variants") == 0) {
7025+
mode = MODE_CPU_VARIANTS;
69397026
} else if (strcmp(argv[i], "-o") == 0) {
69407027
if (i + 1 < argc) {
69417028
op_names_filter = argv[++i];
@@ -6970,6 +7057,15 @@ int main(int argc, char ** argv) {
69707057
} else if (strcmp(argv[i], "--list-ops") == 0) {
69717058
list_all_ops();
69727059
return 0;
7060+
} else if (strcmp(argv[i], "--list") == 0) {
7061+
list_variants_flag = true;
7062+
} else if (strcmp(argv[i], "--variant") == 0) {
7063+
if (i + 1 < argc) {
7064+
cpu_variant_name = argv[++i];
7065+
} else {
7066+
usage(argv);
7067+
return 1;
7068+
}
69737069
} else if (strcmp(argv[i], "--show-coverage") == 0) {
69747070
show_test_coverage();
69757071
return 0;
@@ -6988,6 +7084,21 @@ int main(int argc, char ** argv) {
69887084
output_printer->print_header();
69897085
}
69907086

7087+
if (mode == MODE_CPU_VARIANTS) {
7088+
if (list_variants_flag) {
7089+
list_cpu_variants();
7090+
return 0;
7091+
}
7092+
7093+
if (cpu_variant_name == nullptr) {
7094+
printf("Error: cpu-variants mode requires --variant <name> or --list\n");
7095+
usage(argv);
7096+
return 1;
7097+
}
7098+
7099+
return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1;
7100+
}
7101+
69917102
output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count()));
69927103

69937104
size_t n_ok = 0;

0 commit comments

Comments
 (0)