Skip to content

Commit 398a2f9

Browse files
committed
tests : copy ggml graph copy functions for backend ops tests
This commit copies a few ggml graph copy functions into test-backend-ops.cpp to allow for some more control over testing extra backend buffers.
1 parent 67bb90c commit 398a2f9

File tree

2 files changed

+267
-5
lines changed

2 files changed

+267
-5
lines changed

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ if (NOT LLAMA_SANITIZE_ADDRESS)
198198
endif()
199199
llama_build_and_test(test-gguf.cpp)
200200
llama_build_and_test(test-backend-ops.cpp)
201+
target_include_directories(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
201202

202203
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
203204
llama_build_and_test(test-autorelease.cpp LABEL "model")

tests/test-backend-ops.cpp

Lines changed: 266 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <ggml-backend.h>
2121
#include <ggml-cpp.h>
2222

23+
#include "ggml-impl.h"
24+
2325
#include <algorithm>
2426
#include <array>
2527
#include <cfloat>
@@ -1085,7 +1087,232 @@ struct test_case {
10851087
}
10861088
}
10871089

1088-
bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer) {
1090+
struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) {
1091+
struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor);
1092+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
1093+
dup->nb[i] = tensor->nb[i];
1094+
}
1095+
return dup;
1096+
}
1097+
1098+
struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
1099+
struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) {
1100+
1101+
GGML_ASSERT(src != NULL);
1102+
GGML_ASSERT(src->data && "graph must be allocated");
1103+
1104+
size_t id = ggml_hash_insert(&hash_set, src);
1105+
if (id == GGML_HASHSET_ALREADY_EXISTS) {
1106+
return node_copies[ggml_hash_find(&hash_set, src)];
1107+
}
1108+
1109+
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
1110+
if (src->view_src != NULL) {
1111+
dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src);
1112+
dst->view_offs = src->view_offs;
1113+
}
1114+
dst->op = src->op;
1115+
memcpy(dst->op_params, src->op_params, sizeof(dst->op_params));
1116+
ggml_set_name(dst, src->name);
1117+
1118+
// copy src
1119+
for (int i = 0; i < GGML_MAX_SRC; i++) {
1120+
struct ggml_tensor * s = src->src[i];
1121+
if (s == NULL) {
1122+
continue;
1123+
}
1124+
dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s);
1125+
}
1126+
1127+
node_copies[id] = dst;
1128+
return dst;
1129+
}
1130+
1131+
void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
1132+
size_t id = ggml_hash_find(hash_set, src);
1133+
if (node_init[id]) {
1134+
return;
1135+
}
1136+
node_init[id] = true;
1137+
1138+
struct ggml_tensor * dst = node_copies[id];
1139+
if (dst->view_src != NULL) {
1140+
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
1141+
enum ggml_status status = ggml_backend_view_init(dst);
1142+
GGML_ASSERT(status == GGML_STATUS_SUCCESS);
1143+
}
1144+
else {
1145+
ggml_backend_tensor_copy(src, dst);
1146+
}
1147+
1148+
// init src
1149+
for (int i = 0; i < GGML_MAX_SRC; i++) {
1150+
struct ggml_tensor * s = src->src[i];
1151+
if (s == NULL) {
1152+
continue;
1153+
}
1154+
graph_copy_init_tensor(hash_set, node_copies, node_init, s);
1155+
}
1156+
}
1157+
1158+
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph,
1159+
std::unordered_map<ggml_backend_buffer_type_t, ggml_backend_buffer_t> extra_buf_map) {
1160+
GGML_ASSERT(graph);
1161+
struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
1162+
struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
1163+
bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0]));
1164+
1165+
struct ggml_init_params params = {
1166+
/* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
1167+
/* .mem_buffer = */ NULL,
1168+
/* .no_alloc = */ true
1169+
};
1170+
1171+
struct ggml_context * ctx_allocated = ggml_init(params);
1172+
struct ggml_context * ctx_unallocated = ggml_init(params);
1173+
1174+
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
1175+
GGML_LOG_ERROR("%s: failed to allocate context for graph copy\n", __func__);
1176+
ggml_hash_set_free(&hash_set);
1177+
free(node_copies);
1178+
free(node_init);
1179+
ggml_free(ctx_allocated);
1180+
ggml_free(ctx_unallocated);
1181+
return {
1182+
/* .buffer = */ NULL,
1183+
/* .ctx_allocated = */ NULL,
1184+
/* .ctx_unallocated = */ NULL,
1185+
/* .graph = */ NULL,
1186+
};
1187+
}
1188+
1189+
// dup nodes
1190+
for (int i = 0; i < graph->n_nodes; i++) {
1191+
struct ggml_tensor * node = graph->nodes[i];
1192+
graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node);
1193+
}
1194+
1195+
// allocate nodes
1196+
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
1197+
1198+
if (buffer == NULL) {
1199+
GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__);
1200+
ggml_hash_set_free(&hash_set);
1201+
free(node_copies);
1202+
free(node_init);
1203+
ggml_free(ctx_allocated);
1204+
ggml_free(ctx_unallocated);
1205+
for (auto buft : extra_buf_map) {
1206+
ggml_backend_buffer_free(buft.second);
1207+
}
1208+
return {
1209+
/* .buffer = */ NULL,
1210+
/* .ctx_allocated = */ NULL,
1211+
/* .ctx_unallocated = */ NULL,
1212+
/* .graph = */ NULL,
1213+
};
1214+
}
1215+
1216+
//printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024);
1217+
1218+
// copy data and init views
1219+
for (int i = 0; i < graph->n_nodes; i++) {
1220+
struct ggml_tensor * node = graph->nodes[i];
1221+
1222+
if (node->op != GGML_OP_NONE && node->src[0]) {
1223+
for (const auto& [buft, buf] : extra_buf_map) {
1224+
size_t id = ggml_hash_find(&hash_set, node);
1225+
ggml_status status = ggml_backend_buffer_init_tensor(buf, node_copies[id]);
1226+
if (status != GGML_STATUS_SUCCESS) {
1227+
GGML_LOG_ERROR("%s: failed to initialize tensor in extra buffer type '%s' for graph copy\n", __func__, ggml_backend_buft_name(buft));
1228+
}
1229+
}
1230+
}
1231+
1232+
graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
1233+
}
1234+
1235+
// build graph copy
1236+
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
1237+
for (int i = 0; i < graph->n_nodes; i++) {
1238+
struct ggml_tensor * node = graph->nodes[i];
1239+
struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)];
1240+
graph_copy->nodes[i] = node_copy;
1241+
}
1242+
graph_copy->n_nodes = graph->n_nodes;
1243+
1244+
ggml_hash_set_free(&hash_set);
1245+
free(node_copies);
1246+
free(node_init);
1247+
1248+
return {
1249+
/* .buffer = */ buffer,
1250+
/* .ctx_allocated = */ ctx_allocated,
1251+
/* .ctx_unallocated = */ ctx_unallocated,
1252+
/* .graph = */ graph_copy,
1253+
};
1254+
}
1255+
1256+
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2,
1257+
struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data,
1258+
struct ggml_tensor * test_node,
1259+
std::unordered_map<ggml_backend_buffer_type_t, ggml_backend_buffer_t> extra_buf_map) {
1260+
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph, extra_buf_map);
1261+
if (copy.buffer == NULL) {
1262+
return false;
1263+
}
1264+
1265+
struct ggml_cgraph * g1 = graph;
1266+
struct ggml_cgraph * g2 = copy.graph;
1267+
1268+
assert(g1->n_nodes == g2->n_nodes);
1269+
1270+
if (test_node != nullptr) {
1271+
// Compute the whole graph and only test the output for a specific tensor
1272+
ggml_backend_graph_compute(backend1, g1);
1273+
ggml_backend_graph_compute(backend2, g2);
1274+
1275+
int test_node_idx = -1;
1276+
for (int i = 0; i < g1->n_nodes; i++) {
1277+
struct ggml_tensor * t1 = g1->nodes[i];
1278+
if (t1 == test_node) {
1279+
test_node_idx = i;
1280+
break;
1281+
}
1282+
}
1283+
GGML_ASSERT(test_node_idx != -1);
1284+
1285+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
1286+
} else {
1287+
for (int i = 0; i < g1->n_nodes; i++) {
1288+
struct ggml_tensor * t1 = g1->nodes[i];
1289+
struct ggml_tensor * t2 = g2->nodes[i];
1290+
1291+
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
1292+
1293+
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
1294+
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
1295+
1296+
ggml_backend_graph_compute(backend1, &g1v);
1297+
ggml_backend_graph_compute(backend2, &g2v);
1298+
1299+
if (ggml_is_view_op(t1->op)) {
1300+
continue;
1301+
}
1302+
1303+
// compare results, calculate rms etc
1304+
if (!callback(i, t1, t2, user_data)) {
1305+
break;
1306+
}
1307+
}
1308+
}
1309+
ggml_backend_graph_copy_free(copy);
1310+
1311+
return true;
1312+
}
1313+
1314+
bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer,
1315+
std::unordered_map<ggml_backend_buffer_type_t, ggml_backend_buffer_t> extra_buf_map) {
10891316
mode = MODE_TEST;
10901317

10911318
ggml_init_params params = {
@@ -1225,7 +1452,8 @@ struct test_case {
12251452
GGML_UNUSED(index);
12261453
};
12271454

1228-
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
1455+
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback,
1456+
&ud, run_whole_graph() ? out : nullptr, extra_buf_map);
12291457

12301458
ggml_backend_buffer_free(buf);
12311459

@@ -6764,7 +6992,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
67646992

67656993
size_t n_ok = 0;
67666994
for (auto & test : test_cases) {
6767-
if (test->eval(backend, backend_cpu, op_names_filter, output_printer)) {
6995+
if (test->eval(backend, backend_cpu, op_names_filter, output_printer, {})) {
67686996
n_ok++;
67696997
}
67706998
}
@@ -6923,7 +7151,34 @@ static void print_backend_features(ggml_backend_t backend) {
69237151
static bool test_cpu_variant(const char * variant_name, const char * op_names_filter,
69247152
const char * params_filter, printer * output_printer) {
69257153
std::string backend_ref_name = "CPU-ref";
6926-
ggml_backend_load_all_variants("cpu");
7154+
std::string variant = std::string(variant_name).substr(4);
7155+
ggml_backend_load_variant("cpu", variant.c_str());
7156+
7157+
std::unordered_map<ggml_backend_buffer_type_t, ggml_backend_buffer_t> extra_buf_map;
7158+
{
7159+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
7160+
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
7161+
7162+
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
7163+
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
7164+
if (ggml_backend_dev_get_extra_bufts_fn) {
7165+
ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn(cpu_dev);
7166+
while (extra_bufts && *extra_bufts) {
7167+
// TODO: What should the size be here? Do extra buffer types need a size even?
7168+
// We need to have a value larger than 0 to avoid the default buffer to be used.
7169+
extra_buf_map[*extra_bufts] = ggml_backend_buft_alloc_buffer(*extra_bufts, 1);
7170+
++extra_bufts;
7171+
}
7172+
}
7173+
}
7174+
7175+
printf("\n");
7176+
for (auto buft : extra_buf_map) {
7177+
printf("Using extra buffer type: %s\n", ggml_backend_buft_name(buft.first));
7178+
}
7179+
printf("\n");
7180+
7181+
ggml_backend_load_variant("cpu", "ref");
69277182

69287183
ggml_backend_t backend_ref = ggml_backend_init_by_name(backend_ref_name.c_str(), nullptr);
69297184
if (backend_ref == nullptr) {
@@ -6941,6 +7196,8 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69417196
}
69427197
print_backend_features(backend_variant);
69437198

7199+
7200+
69447201
printf("Testing CPU variant '%s' against '%s' backend...\n\n", variant_name, backend_ref_name.c_str());
69457202

69467203
auto test_cases = make_test_cases_eval();
@@ -6962,7 +7219,7 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69627219
for (auto & test : test_cases) {
69637220
// Switch the order so that we copy from the reference backend to the
69647221
// variant backend.
6965-
if (test->eval(backend_ref, backend_variant, op_names_filter, output_printer)) {
7222+
if (test->eval(backend_ref, backend_variant, op_names_filter, output_printer, extra_buf_map)) {
69667223
n_ok++;
69677224
}
69687225
}
@@ -6972,6 +7229,10 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69727229
ggml_backend_free(backend_variant);
69737230
ggml_backend_free(backend_ref);
69747231

7232+
for (auto buft : extra_buf_map) {
7233+
ggml_backend_buffer_free(buft.second);
7234+
}
7235+
69757236
return n_ok == test_cases.size();
69767237
}
69777238

0 commit comments

Comments
 (0)