Skip to content

Commit 61ee32d

Browse files
committed
tests: set tensor usage as weight for weight tensors
only for mul_mat and mul_mat_id ops Signed-off-by: Aaron Teo <[email protected]>
1 parent 1926e07 commit 61ee32d

File tree

1 file changed

+116
-9
lines changed

1 file changed

+116
-9
lines changed

tests/test-backend-ops.cpp

Lines changed: 116 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,9 @@ struct test_case {
11601160

11611161
std::vector<ggml_tensor *> sentinels;
11621162

1163+
// Track weight tensors for separate buffer allocation with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
1164+
std::vector<ggml_tensor *> weight_tensors;
1165+
11631166
std::string current_op_name;
11641167

11651168
void add_sentinel(ggml_context * ctx) {
@@ -1238,6 +1241,8 @@ struct test_case {
12381241
const char * op_names_filter,
12391242
printer * output_printer) {
12401243
mode = MODE_TEST;
1244+
weight_tensors.clear();
1245+
sentinels.clear();
12411246

12421247
ggml_init_params params = {
12431248
/* .mem_size = */ ggml_tensor_overhead()*128 + ggml_graph_overhead(),
@@ -1288,10 +1293,35 @@ struct test_case {
12881293
// post-graph sentinel
12891294
add_sentinel(ctx);
12901295

1291-
// allocate
1296+
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
1297+
ggml_backend_buffer_t weights_buf = nullptr;
1298+
if (!weight_tensors.empty()) {
1299+
// Calculate total size needed for weight tensors
1300+
size_t weight_size = 0;
1301+
for (ggml_tensor * wt : weight_tensors) {
1302+
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend1), wt);
1303+
}
1304+
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend1)));
1305+
1306+
weights_buf = ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend1), weight_size);
1307+
if (weights_buf == NULL) {
1308+
printf("failed to allocate weight tensors [%s] ", ggml_backend_name(backend1));
1309+
ggml_free(ctx);
1310+
return test_status_t::FAIL;
1311+
}
1312+
ggml_backend_buffer_set_usage(weights_buf, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
1313+
1314+
// Allocate each weight tensor in the weights buffer
1315+
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf);
1316+
for (ggml_tensor * wt : weight_tensors) {
1317+
ggml_tallocr_alloc(&weights_talloc, wt);
1318+
}
1319+
}
1320+
1321+
// allocate remaining tensors
12921322
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
12931323

1294-
if (buf == NULL) {
1324+
if (buf == NULL && weights_buf == NULL) {
12951325
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
12961326
ggml_free(ctx);
12971327
return test_status_t::FAIL;
@@ -1385,6 +1415,9 @@ struct test_case {
13851415

13861416
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
13871417

1418+
if (weights_buf) {
1419+
ggml_backend_buffer_free(weights_buf);
1420+
}
13881421
ggml_backend_buffer_free(buf);
13891422

13901423
ggml_free(ctx);
@@ -1404,6 +1437,7 @@ struct test_case {
14041437

14051438
bool eval_perf(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
14061439
mode = MODE_PERF;
1440+
weight_tensors.clear();
14071441

14081442
static const size_t graph_nodes = 8192;
14091443

@@ -1432,10 +1466,34 @@ struct test_case {
14321466
return true;
14331467
}
14341468

1435-
// allocate
1469+
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
1470+
ggml_backend_buffer_ptr weights_buf(nullptr); // smart ptr
1471+
if (!weight_tensors.empty()) {
1472+
// Calculate total size needed for weight tensors
1473+
size_t weight_size = 0;
1474+
for (ggml_tensor * wt : weight_tensors) {
1475+
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend), wt);
1476+
}
1477+
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)));
1478+
1479+
weights_buf.reset(ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), weight_size));
1480+
if (weights_buf == NULL) {
1481+
printf("failed to allocate weight tensors\n");
1482+
return false;
1483+
}
1484+
ggml_backend_buffer_set_usage(weights_buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
1485+
1486+
// Allocate each weight tensor in the weights buffer
1487+
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf.get());
1488+
for (ggml_tensor * wt : weight_tensors) {
1489+
ggml_tallocr_alloc(&weights_talloc, wt);
1490+
}
1491+
}
1492+
1493+
// allocate remaining tensors
14361494
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
14371495

1438-
if (buf == NULL) {
1496+
if (buf == NULL && weights_buf == NULL) {
14391497
printf("failed to allocate tensors\n");
14401498
return false;
14411499
}
@@ -1534,6 +1592,7 @@ struct test_case {
15341592

15351593
bool eval_support(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
15361594
mode = MODE_SUPPORT;
1595+
weight_tensors.clear();
15371596

15381597
static const size_t graph_nodes = 8192;
15391598

@@ -1569,6 +1628,7 @@ struct test_case {
15691628

15701629
bool eval_grad(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
15711630
mode = MODE_GRAD;
1631+
weight_tensors.clear();
15721632
const std::vector<float> expect = grad_expect();
15731633

15741634
ggml_init_params params = {
@@ -1679,9 +1739,35 @@ struct test_case {
16791739
return true;
16801740
}
16811741

1682-
// allocate
1742+
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
1743+
ggml_backend_buffer_ptr weights_buf(nullptr); // smart ptr
1744+
if (!weight_tensors.empty()) {
1745+
// Calculate total size needed for weight tensors
1746+
size_t weight_size = 0;
1747+
for (ggml_tensor * wt : weight_tensors) {
1748+
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend), wt);
1749+
}
1750+
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)));
1751+
1752+
weights_buf.reset(ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), weight_size));
1753+
if (weights_buf == NULL) {
1754+
test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend));
1755+
info.set_error("weight allocation", "");
1756+
output_printer->print_operation(info);
1757+
return false;
1758+
}
1759+
ggml_backend_buffer_set_usage(weights_buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
1760+
1761+
// Allocate each weight tensor in the weights buffer
1762+
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf.get());
1763+
for (ggml_tensor * wt : weight_tensors) {
1764+
ggml_tallocr_alloc(&weights_talloc, wt);
1765+
}
1766+
}
1767+
1768+
// allocate remaining tensors
16831769
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
1684-
if (buf == NULL) {
1770+
if (buf == NULL && weights_buf == NULL) {
16851771
test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend));
16861772
info.set_error("allocation", "");
16871773
output_printer->print_operation(info);
@@ -3606,6 +3692,7 @@ struct test_mul_mat : public test_case {
36063692

36073693
a = ggml_new_tensor_4d(ctx, type_a, ne_a[per[0]], ne_a[per[1]], ne_a[per[2]], ne_a[per[3]]);
36083694
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
3695+
weight_tensors.push_back(a); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
36093696
if (!ggml_is_quantized(type_a)) {
36103697
if (bs[1] == 1 && nr[1] == 1) {
36113698
ggml_set_param(a);
@@ -3623,6 +3710,7 @@ struct test_mul_mat : public test_case {
36233710
const int64_t k_physical = k_v == 0 ? k : k_v;
36243711
a = ggml_new_tensor_4d(ctx, type_a, k_physical, m, bs[0], bs[1]);
36253712
b = ggml_new_tensor_4d(ctx, type_b, k_physical, n, bs[0]*nr[0], bs[1]*nr[1]);
3713+
weight_tensors.push_back(a); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
36263714

36273715
if (!ggml_is_quantized(type_a)) {
36283716
if (bs[1] == 1 && nr[1] == 1) {
@@ -3716,6 +3804,7 @@ struct test_mul_mat_id : public test_case {
37163804
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
37173805
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
37183806
ggml_set_name(as, "as");
3807+
weight_tensors.push_back(as); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
37193808

37203809
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
37213810
ggml_set_name(ids, "ids");
@@ -3776,6 +3865,7 @@ struct test_mul_mat_id_fusion : public test_case {
37763865
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
37773866
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
37783867
ggml_set_name(as, "as");
3868+
weight_tensors.push_back(as); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
37793869

37803870
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
37813871
ggml_set_name(ids, "ids");
@@ -3792,6 +3882,7 @@ struct test_mul_mat_id_fusion : public test_case {
37923882

37933883
for (uint32_t i = 1; i < o; ++i) {
37943884
ggml_tensor * a2 = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
3885+
weight_tensors.push_back(a2); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
37953886
ggml_tensor * out2 = ggml_mul_mat_id(ctx, a2, b, ids);
37963887
ggml_set_name(out2, "out2");
37973888
out = ggml_add(ctx, out, out2);
@@ -7861,9 +7952,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
78617952
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
78627953
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
78637954
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
7955+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 64, 64, 2, 2 }));
7956+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 79, 79, 5, 3 }, { 417, 79, 5, 3 }));
7957+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
7958+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 80, 80, 2, 8 }));
7959+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 79, 80, 2, 8 }));
7960+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 81, 80, 2, 8 }));
7961+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 80, 80, 8, 8 }));
7962+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 79, 80, 8, 8 }));
7963+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 81, 80, 8, 8 }));
7964+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 84, 84, 4, 4 }, { 32, 84, 4, 4 }));
7965+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 95, 95, 8, 8 }, { 40, 95, 8, 8 }));
78647966
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
78657967
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
7866-
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));
7968+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 32, 128, 4, 4 }));
7969+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 3, 4 }, { 32, 128, 3, 4 }));
7970+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 32, 128, 4, 1 }));
7971+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 200, 64, 4, 4 }));
7972+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 384, 64, 4, 4 }));
78677973

78687974
for (bool v : {false, true}) {
78697975
for (bool circular : {false, true}) {
@@ -8064,12 +8170,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
80648170
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
80658171
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
80668172

8067-
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 2 }, { 6, 64, 4, 2 }));
8068-
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 8, 128, 4, 1 }));
8173+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 32, 64, 4, 4 }));
8174+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
80698175
// qwen3next with CHUNK_SIZE 64
80708176
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
80718177
// qwen3next with CHUNK_SIZE 128
80728178
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
8179+
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 256, 256, 4, 2 }, { 128, 256, 4, 2 }));
80738180

80748181
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
80758182
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));

0 commit comments

Comments
 (0)