2020#include < ggml-backend.h>
2121#include < ggml-cpp.h>
2222
23+ #include " ggml-impl.h"
24+
2325#include < algorithm>
2426#include < array>
2527#include < cfloat>
@@ -1085,7 +1087,232 @@ struct test_case {
10851087 }
10861088 }
10871089
1088- bool eval (ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer) {
1090+ struct ggml_tensor * ggml_dup_tensor_layout (struct ggml_context * ctx, const struct ggml_tensor * tensor) {
1091+ struct ggml_tensor * dup = ggml_dup_tensor (ctx, tensor);
1092+ for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
1093+ dup->nb [i] = tensor->nb [i];
1094+ }
1095+ return dup;
1096+ }
1097+
1098+ struct ggml_tensor * graph_copy_dup_tensor (struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
1099+ struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) {
1100+
1101+ GGML_ASSERT (src != NULL );
1102+ GGML_ASSERT (src->data && " graph must be allocated" );
1103+
1104+ size_t id = ggml_hash_insert (&hash_set, src);
1105+ if (id == GGML_HASHSET_ALREADY_EXISTS) {
1106+ return node_copies[ggml_hash_find (&hash_set, src)];
1107+ }
1108+
1109+ struct ggml_tensor * dst = ggml_dup_tensor_layout (src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
1110+ if (src->view_src != NULL ) {
1111+ dst->view_src = graph_copy_dup_tensor (hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src );
1112+ dst->view_offs = src->view_offs ;
1113+ }
1114+ dst->op = src->op ;
1115+ memcpy (dst->op_params , src->op_params , sizeof (dst->op_params ));
1116+ ggml_set_name (dst, src->name );
1117+
1118+ // copy src
1119+ for (int i = 0 ; i < GGML_MAX_SRC; i++) {
1120+ struct ggml_tensor * s = src->src [i];
1121+ if (s == NULL ) {
1122+ continue ;
1123+ }
1124+ dst->src [i] = graph_copy_dup_tensor (hash_set, node_copies, ctx_allocated, ctx_unallocated, s);
1125+ }
1126+
1127+ node_copies[id] = dst;
1128+ return dst;
1129+ }
1130+
1131+ void graph_copy_init_tensor (struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
1132+ size_t id = ggml_hash_find (hash_set, src);
1133+ if (node_init[id]) {
1134+ return ;
1135+ }
1136+ node_init[id] = true ;
1137+
1138+ struct ggml_tensor * dst = node_copies[id];
1139+ if (dst->view_src != NULL ) {
1140+ graph_copy_init_tensor (hash_set, node_copies, node_init, src->view_src );
1141+ enum ggml_status status = ggml_backend_view_init (dst);
1142+ GGML_ASSERT (status == GGML_STATUS_SUCCESS);
1143+ }
1144+ else {
1145+ ggml_backend_tensor_copy (src, dst);
1146+ }
1147+
1148+ // init src
1149+ for (int i = 0 ; i < GGML_MAX_SRC; i++) {
1150+ struct ggml_tensor * s = src->src [i];
1151+ if (s == NULL ) {
1152+ continue ;
1153+ }
1154+ graph_copy_init_tensor (hash_set, node_copies, node_init, s);
1155+ }
1156+ }
1157+
1158+ struct ggml_backend_graph_copy ggml_backend_graph_copy (ggml_backend_t backend, struct ggml_cgraph * graph,
1159+ std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map) {
1160+ GGML_ASSERT (graph);
1161+ struct ggml_hash_set hash_set = ggml_hash_set_new (graph->visited_hash_set .size );
1162+ struct ggml_tensor ** node_copies = (ggml_tensor **) calloc (hash_set.size , sizeof (node_copies[0 ])); // NOLINT
1163+ bool * node_init = (bool *) calloc (hash_set.size , sizeof (node_init[0 ]));
1164+
1165+ struct ggml_init_params params = {
1166+ /* .mem_size = */ ggml_tensor_overhead ()*hash_set.size + ggml_graph_overhead_custom (graph->size , false ),
1167+ /* .mem_buffer = */ NULL ,
1168+ /* .no_alloc = */ true
1169+ };
1170+
1171+ struct ggml_context * ctx_allocated = ggml_init (params);
1172+ struct ggml_context * ctx_unallocated = ggml_init (params);
1173+
1174+ if (ctx_allocated == NULL || ctx_unallocated == NULL ) {
1175+ GGML_LOG_ERROR (" %s: failed to allocate context for graph copy\n " , __func__);
1176+ ggml_hash_set_free (&hash_set);
1177+ free (node_copies);
1178+ free (node_init);
1179+ ggml_free (ctx_allocated);
1180+ ggml_free (ctx_unallocated);
1181+ return {
1182+ /* .buffer = */ NULL ,
1183+ /* .ctx_allocated = */ NULL ,
1184+ /* .ctx_unallocated = */ NULL ,
1185+ /* .graph = */ NULL ,
1186+ };
1187+ }
1188+
1189+ // dup nodes
1190+ for (int i = 0 ; i < graph->n_nodes ; i++) {
1191+ struct ggml_tensor * node = graph->nodes [i];
1192+ graph_copy_dup_tensor (hash_set, node_copies, ctx_allocated, ctx_unallocated, node);
1193+ }
1194+
1195+ // allocate nodes
1196+ ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors (ctx_allocated, backend);
1197+
1198+ if (buffer == NULL ) {
1199+ GGML_LOG_ERROR (" %s: failed to allocate buffer for graph copy\n " , __func__);
1200+ ggml_hash_set_free (&hash_set);
1201+ free (node_copies);
1202+ free (node_init);
1203+ ggml_free (ctx_allocated);
1204+ ggml_free (ctx_unallocated);
1205+ for (auto buft : extra_buf_map) {
1206+ ggml_backend_buffer_free (buft.second );
1207+ }
1208+ return {
1209+ /* .buffer = */ NULL ,
1210+ /* .ctx_allocated = */ NULL ,
1211+ /* .ctx_unallocated = */ NULL ,
1212+ /* .graph = */ NULL ,
1213+ };
1214+ }
1215+
1216+ // printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024);
1217+
1218+ // copy data and init views
1219+ for (int i = 0 ; i < graph->n_nodes ; i++) {
1220+ struct ggml_tensor * node = graph->nodes [i];
1221+
1222+ if (node->op != GGML_OP_NONE && node->src [0 ]) {
1223+ for (const auto & [buft, buf] : extra_buf_map) {
1224+ size_t id = ggml_hash_find (&hash_set, node);
1225+ ggml_status status = ggml_backend_buffer_init_tensor (buf, node_copies[id]);
1226+ if (status != GGML_STATUS_SUCCESS) {
1227+ GGML_LOG_ERROR (" %s: failed to initialize tensor in extra buffer type '%s' for graph copy\n " , __func__, ggml_backend_buft_name (buft));
1228+ }
1229+ }
1230+ }
1231+
1232+ graph_copy_init_tensor (&hash_set, node_copies, node_init, node);
1233+ }
1234+
1235+ // build graph copy
1236+ struct ggml_cgraph * graph_copy = ggml_new_graph_custom (ctx_allocated, graph->size , false );
1237+ for (int i = 0 ; i < graph->n_nodes ; i++) {
1238+ struct ggml_tensor * node = graph->nodes [i];
1239+ struct ggml_tensor * node_copy = node_copies[ggml_hash_find (&hash_set, node)];
1240+ graph_copy->nodes [i] = node_copy;
1241+ }
1242+ graph_copy->n_nodes = graph->n_nodes ;
1243+
1244+ ggml_hash_set_free (&hash_set);
1245+ free (node_copies);
1246+ free (node_init);
1247+
1248+ return {
1249+ /* .buffer = */ buffer,
1250+ /* .ctx_allocated = */ ctx_allocated,
1251+ /* .ctx_unallocated = */ ctx_unallocated,
1252+ /* .graph = */ graph_copy,
1253+ };
1254+ }
1255+
1256+ bool ggml_backend_compare_graph_backend (ggml_backend_t backend1, ggml_backend_t backend2,
1257+ struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data,
1258+ struct ggml_tensor * test_node,
1259+ std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map) {
1260+ struct ggml_backend_graph_copy copy = ggml_backend_graph_copy (backend2, graph, extra_buf_map);
1261+ if (copy.buffer == NULL ) {
1262+ return false ;
1263+ }
1264+
1265+ struct ggml_cgraph * g1 = graph;
1266+ struct ggml_cgraph * g2 = copy.graph ;
1267+
1268+ assert (g1->n_nodes == g2->n_nodes );
1269+
1270+ if (test_node != nullptr ) {
1271+ // Compute the whole graph and only test the output for a specific tensor
1272+ ggml_backend_graph_compute (backend1, g1);
1273+ ggml_backend_graph_compute (backend2, g2);
1274+
1275+ int test_node_idx = -1 ;
1276+ for (int i = 0 ; i < g1->n_nodes ; i++) {
1277+ struct ggml_tensor * t1 = g1->nodes [i];
1278+ if (t1 == test_node) {
1279+ test_node_idx = i;
1280+ break ;
1281+ }
1282+ }
1283+ GGML_ASSERT (test_node_idx != -1 );
1284+
1285+ callback (test_node_idx, g1->nodes [test_node_idx], g2->nodes [test_node_idx], user_data);
1286+ } else {
1287+ for (int i = 0 ; i < g1->n_nodes ; i++) {
1288+ struct ggml_tensor * t1 = g1->nodes [i];
1289+ struct ggml_tensor * t2 = g2->nodes [i];
1290+
1291+ assert (t1->op == t2->op && ggml_are_same_layout (t1, t2));
1292+
1293+ struct ggml_cgraph g1v = ggml_graph_view (g1, i, i + 1 );
1294+ struct ggml_cgraph g2v = ggml_graph_view (g2, i, i + 1 );
1295+
1296+ ggml_backend_graph_compute (backend1, &g1v);
1297+ ggml_backend_graph_compute (backend2, &g2v);
1298+
1299+ if (ggml_is_view_op (t1->op )) {
1300+ continue ;
1301+ }
1302+
1303+ // compare results, calculate rms etc
1304+ if (!callback (i, t1, t2, user_data)) {
1305+ break ;
1306+ }
1307+ }
1308+ }
1309+ ggml_backend_graph_copy_free (copy);
1310+
1311+ return true ;
1312+ }
1313+
1314+ bool eval (ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer,
1315+ std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map) {
10891316 mode = MODE_TEST;
10901317
10911318 ggml_init_params params = {
@@ -1225,7 +1452,8 @@ struct test_case {
12251452 GGML_UNUSED (index);
12261453 };
12271454
1228- const bool cmp_ok = ggml_backend_compare_graph_backend (backend1, backend2, gf, callback, &ud, run_whole_graph () ? out : nullptr );
1455+ const bool cmp_ok = ggml_backend_compare_graph_backend (backend1, backend2, gf, callback,
1456+ &ud, run_whole_graph () ? out : nullptr , extra_buf_map);
12291457
12301458 ggml_backend_buffer_free (buf);
12311459
@@ -6764,7 +6992,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
67646992
67656993 size_t n_ok = 0 ;
67666994 for (auto & test : test_cases) {
6767- if (test->eval (backend, backend_cpu, op_names_filter, output_printer)) {
6995+ if (test->eval (backend, backend_cpu, op_names_filter, output_printer, {} )) {
67686996 n_ok++;
67696997 }
67706998 }
@@ -6923,7 +7151,34 @@ static void print_backend_features(ggml_backend_t backend) {
69237151static bool test_cpu_variant (const char * variant_name, const char * op_names_filter,
69247152 const char * params_filter, printer * output_printer) {
69257153 std::string backend_ref_name = " CPU-ref" ;
6926- ggml_backend_load_all_variants (" cpu" );
7154+ std::string variant = std::string (variant_name).substr (4 );
7155+ ggml_backend_load_variant (" cpu" , variant.c_str ());
7156+
7157+ std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map;
7158+ {
7159+ auto * cpu_dev = ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_CPU);
7160+ auto * cpu_reg = ggml_backend_dev_backend_reg (cpu_dev);
7161+
7162+ auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t )
7163+ ggml_backend_reg_get_proc_address (cpu_reg, " ggml_backend_dev_get_extra_bufts" );
7164+ if (ggml_backend_dev_get_extra_bufts_fn) {
7165+ ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn (cpu_dev);
7166+ while (extra_bufts && *extra_bufts) {
7167+ // TODO: What should the size be here? Do extra buffer types need a size even?
7168+ // We need to have a value larger than 0 to avoid the default buffer to be used.
7169+ extra_buf_map[*extra_bufts] = ggml_backend_buft_alloc_buffer (*extra_bufts, 1 );
7170+ ++extra_bufts;
7171+ }
7172+ }
7173+ }
7174+
7175+ printf (" \n " );
7176+ for (auto buft : extra_buf_map) {
7177+ printf (" Using extra buffer type: %s\n " , ggml_backend_buft_name (buft.first ));
7178+ }
7179+ printf (" \n " );
7180+
7181+ ggml_backend_load_variant (" cpu" , " ref" );
69277182
69287183 ggml_backend_t backend_ref = ggml_backend_init_by_name (backend_ref_name.c_str (), nullptr );
69297184 if (backend_ref == nullptr ) {
@@ -6941,6 +7196,8 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69417196 }
69427197 print_backend_features (backend_variant);
69437198
7199+
7200+
69447201 printf (" Testing CPU variant '%s' against '%s' backend...\n\n " , variant_name, backend_ref_name.c_str ());
69457202
69467203 auto test_cases = make_test_cases_eval ();
@@ -6962,7 +7219,7 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69627219 for (auto & test : test_cases) {
69637220 // Switch the order so that we copy from the reference backend to the
69647221 // variant backend.
6965- if (test->eval (backend_ref, backend_variant, op_names_filter, output_printer)) {
7222+ if (test->eval (backend_ref, backend_variant, op_names_filter, output_printer, extra_buf_map )) {
69667223 n_ok++;
69677224 }
69687225 }
@@ -6972,6 +7229,10 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
69727229 ggml_backend_free (backend_variant);
69737230 ggml_backend_free (backend_ref);
69747231
7232+ for (auto buft : extra_buf_map) {
7233+ ggml_backend_buffer_free (buft.second );
7234+ }
7235+
69757236 return n_ok == test_cases.size ();
69767237}
69777238
0 commit comments