Skip to content

Commit 0071605

Browse files
committed
tests: add mul_mat perf/functional tests for p021/nc vulkan shaders
1 parent af04481 commit 0071605

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

tests/test-backend-ops.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,9 +1964,10 @@ struct test_mul_mat : public test_case {
19641964
const std::array<int64_t, 2> bs; // dims 3 and 4
19651965
const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
19661966
const std::array<int64_t, 4> per; // permutation of dimensions
1967+
const bool v; // whether a is a non-contiguous view
19671968

19681969
std::string vars() override {
1969-
return VARS_TO_STR8(type_a, type_b, m, n, k, bs, nr, per);
1970+
return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
19701971
}
19711972

19721973
double max_nmse_err() override {
@@ -1986,8 +1987,9 @@ struct test_mul_mat : public test_case {
19861987
int64_t m = 32, int64_t n = 32, int64_t k = 32,
19871988
std::array<int64_t, 2> bs = {10, 10},
19881989
std::array<int64_t, 2> nr = {2, 2},
1989-
std::array<int64_t, 4> per = {0, 1, 2, 3})
1990-
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per) {}
1990+
std::array<int64_t, 4> per = {0, 1, 2, 3},
1991+
bool v = false)
1992+
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per), v(v) {}
19911993

19921994
ggml_tensor * build_graph(ggml_context * ctx) override {
19931995
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
@@ -1997,6 +1999,7 @@ struct test_mul_mat : public test_case {
19971999
const int npermuted = (per[0] != 0) + (per[1] != 1) + (per[2] != 2) + (per[3] != 3);
19982000
if (npermuted > 0) {
19992001
GGML_ASSERT(npermuted == 2);
2002+
GGML_ASSERT(!v); // not handled
20002003
GGML_ASSERT(!ggml_is_quantized(type_a) || per[0] == 0);
20012004
GGML_ASSERT(!ggml_is_quantized(type_b) || per[0] == 0);
20022005

@@ -2020,7 +2023,13 @@ struct test_mul_mat : public test_case {
20202023
ggml_set_name(a, "a_permuted");
20212024
ggml_set_name(b, "b_permuted");
20222025
} else {
2023-
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
2026+
2027+
if (v) {
2028+
a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
2029+
a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
2030+
} else {
2031+
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
2032+
}
20242033
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
20252034
if (!ggml_is_quantized(type_a)) {
20262035
if (bs[1] == 1 && nr[1] == 1) {
@@ -4176,6 +4185,17 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
41764185
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
41774186
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
41784187

4188+
for (auto bs : {1,2,4,8}) {
4189+
for (auto nr : {1,4}) {
4190+
for (uint32_t m = 0; m < 2; ++m) {
4191+
for (uint32_t k = 0; k < 2; ++k) {
4192+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056 + m, 1, 128 + k, {bs, 1}, {nr, 1}, {0, 2, 1, 3}));
4193+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128 + m, 1, 1056 + k, {bs, 1}, {nr, 1}, {0, 1, 2, 3}, true));
4194+
}
4195+
}
4196+
}
4197+
}
4198+
41794199
// sycl backend will limit task global_range < MAX_INT
41804200
// test case for f16-type-convert-to-fp32 kernel with large k under fp32 compute dtype (occurs in stable-diffusion)
41814201
// however this case needs to alloc more memory which may fail in some devices (Intel Arc770, etc.)
@@ -4444,6 +4464,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
44444464
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1}));
44454465
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32000, 512, 1, 1}));
44464466

4467+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
4468+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, true));
4469+
44474470
for (int bs : {1, 2, 3, 4, 5, 8, 512}) {
44484471
for (ggml_type type_a : all_types) {
44494472
for (ggml_type type_b : {GGML_TYPE_F32}) {

0 commit comments

Comments
 (0)