@@ -4252,39 +4252,45 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
42524252#if 1
42534253 for (ggml_type type_a : base_types) {
42544254 for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
4255- // test cases without permutation
4256- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {1 , 1 }, {1 , 1 }));
4257- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {1 , 1 }, {2 , 1 }));
4258- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {1 , 1 }, {1 , 2 }));
4259- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 1 }, {1 , 1 }));
4260- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 1 }, {2 , 1 }));
4261- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 2 }, {1 , 1 }));
4262- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 2 }, {2 , 1 }));
4263- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 2 }, {1 , 2 }));
4264- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {3 , 2 }, {2 , 2 }));
4265-
4266- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {1 , 1 }, {1 , 1 }));
4267- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {1 , 1 }, {2 , 1 }));
4268- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {1 , 1 }, {1 , 2 }));
4269- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 1 }, {1 , 1 }));
4270- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 1 }, {2 , 1 }));
4271- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 2 }, {1 , 1 }));
4272- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 2 }, {2 , 1 }));
4273- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 2 }, {1 , 2 }));
4274- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {3 , 2 }, {2 , 2 }));
4275-
4276- // test cases with permutation
4277- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4278- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4279- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4280-
4281- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , 256 , {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4282- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , 256 , {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4283- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , 256 , {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4284-
4285- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4286- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4287- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4255+ std::vector<int > ks = { 256 };
4256+ if (ggml_blck_size (type_a) == 1 ) {
4257+ ks.push_back (4 );
4258+ }
4259+ for (auto k : ks) {
4260+ // test cases without permutation
4261+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {1 , 1 }, {1 , 1 }));
4262+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {1 , 1 }, {2 , 1 }));
4263+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {1 , 1 }, {1 , 2 }));
4264+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 1 }, {1 , 1 }));
4265+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 1 }, {2 , 1 }));
4266+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 2 }, {1 , 1 }));
4267+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 2 }, {2 , 1 }));
4268+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 2 }, {1 , 2 }));
4269+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {3 , 2 }, {2 , 2 }));
4270+
4271+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {1 , 1 }, {1 , 1 }));
4272+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {1 , 1 }, {2 , 1 }));
4273+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {1 , 1 }, {1 , 2 }));
4274+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 1 }, {1 , 1 }));
4275+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 1 }, {2 , 1 }));
4276+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 2 }, {1 , 1 }));
4277+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 2 }, {2 , 1 }));
4278+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 2 }, {1 , 2 }));
4279+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {3 , 2 }, {2 , 2 }));
4280+
4281+ // test cases with permutation
4282+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4283+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4284+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , k, {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4285+
4286+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , k, {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4287+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , k, {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4288+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 8 , k, {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4289+
4290+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {2 , 3 }, {1 , 1 }, {0 , 2 , 1 , 3 }));
4291+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {2 , 3 }, {1 , 1 }, {0 , 1 , 3 , 2 }));
4292+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , k, {2 , 3 }, {1 , 1 }, {0 , 3 , 2 , 1 }));
4293+ }
42884294
42894295 // test cases with large ne00/ne10 to cover stream-k fixup
42904296 test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 1024 , {3 , 2 }, {1 , 1 }));
0 commit comments