@@ -1964,9 +1964,10 @@ struct test_mul_mat : public test_case {
19641964 const std::array<int64_t , 2 > bs; // dims 3 and 4
19651965 const std::array<int64_t , 2 > nr; // repeat in dims 3 and 4
19661966 const std::array<int64_t , 4 > per; // permutation of dimensions
1967+ const bool v; // whether a is a non-contiguous view
19671968
19681969 std::string vars () override {
1969- return VARS_TO_STR8 (type_a, type_b, m, n, k, bs, nr, per);
1970+ return VARS_TO_STR9 (type_a, type_b, m, n, k, bs, nr, per, v );
19701971 }
19711972
19721973 double max_nmse_err () override {
@@ -1986,8 +1987,9 @@ struct test_mul_mat : public test_case {
19861987 int64_t m = 32 , int64_t n = 32 , int64_t k = 32 ,
19871988 std::array<int64_t , 2 > bs = {10 , 10 },
19881989 std::array<int64_t , 2 > nr = {2 , 2 },
1989- std::array<int64_t , 4 > per = {0 , 1 , 2 , 3 })
1990- : type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per) {}
1990+ std::array<int64_t , 4 > per = {0 , 1 , 2 , 3 },
1991+ bool v = false )
1992+ : type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per), v(v) {}
19911993
19921994 ggml_tensor * build_graph (ggml_context * ctx) override {
19931995 // C^T = A * B^T: (k, m) * (k, n) => (m, n)
@@ -1997,6 +1999,7 @@ struct test_mul_mat : public test_case {
19971999 const int npermuted = (per[0 ] != 0 ) + (per[1 ] != 1 ) + (per[2 ] != 2 ) + (per[3 ] != 3 );
19982000 if (npermuted > 0 ) {
19992001 GGML_ASSERT (npermuted == 2 );
2002+ GGML_ASSERT (!v); // not handled
20002003 GGML_ASSERT (!ggml_is_quantized (type_a) || per[0 ] == 0 );
20012004 GGML_ASSERT (!ggml_is_quantized (type_b) || per[0 ] == 0 );
20022005
@@ -2020,7 +2023,13 @@ struct test_mul_mat : public test_case {
20202023 ggml_set_name (a, " a_permuted" );
20212024 ggml_set_name (b, " b_permuted" );
20222025 } else {
2023- a = ggml_new_tensor_4d (ctx, type_a, k, m, bs[0 ], bs[1 ]);
2026+
2027+ if (v) {
2028+ a = ggml_new_tensor_4d (ctx, type_a, k*2 , m, bs[0 ], bs[1 ]);
2029+ a = ggml_view_4d (ctx, a, k, m, bs[0 ], bs[1 ], a->nb [1 ], a->nb [2 ], a->nb [3 ], 0 );
2030+ } else {
2031+ a = ggml_new_tensor_4d (ctx, type_a, k, m, bs[0 ], bs[1 ]);
2032+ }
20242033 b = ggml_new_tensor_4d (ctx, type_b, k, n, bs[0 ]*nr[0 ], bs[1 ]*nr[1 ]);
20252034 if (!ggml_is_quantized (type_a)) {
20262035 if (bs[1 ] == 1 && nr[1 ] == 1 ) {
@@ -4176,6 +4185,17 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
41764185 test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 64 , 45 , 128 , { 8 , 1 }, {4 , 1 }));
41774186 test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 128 , 45 , 64 , { 8 , 1 }, {4 , 1 }));
41784187
4188+ for (auto bs : {1 ,2 ,4 ,8 }) {
4189+ for (auto nr : {1 ,4 }) {
4190+ for (uint32_t m = 0 ; m < 2 ; ++m) {
4191+ for (uint32_t k = 0 ; k < 2 ; ++k) {
4192+ test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 1056 + m, 1 , 128 + k, {bs, 1 }, {nr, 1 }, {0 , 2 , 1 , 3 }));
4193+ test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 128 + m, 1 , 1056 + k, {bs, 1 }, {nr, 1 }, {0 , 1 , 2 , 3 }, true ));
4194+ }
4195+ }
4196+ }
4197+ }
4198+
41794199 // sycl backend will limit task global_range < MAX_INT
41804200 // test case for f16-type-convert-to-fp32 kernel with large k under fp32 compute dtype (occurs in stable-diffusion)
41814201 // however this case needs to alloc more memory which may fail in some devices (Intel Arc770, etc.)
@@ -4444,6 +4464,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
44444464 test_cases.emplace_back (new test_argmax (GGML_TYPE_F32, {1024 , 10 , 1 , 1 }));
44454465 test_cases.emplace_back (new test_argmax (GGML_TYPE_F32, {32000 , 512 , 1 , 1 }));
44464466
4467+ test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 16416 , 1 , 128 , {8 , 1 }, {4 , 1 }, {0 , 2 , 1 , 3 }));
4468+ test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 128 , 1 , 16416 , {8 , 1 }, {4 , 1 }, {0 , 1 , 2 , 3 }, true ));
4469+
44474470 for (int bs : {1 , 2 , 3 , 4 , 5 , 8 , 512 }) {
44484471 for (ggml_type type_a : all_types) {
44494472 for (ggml_type type_b : {GGML_TYPE_F32}) {
0 commit comments