3636#include < thread>
3737#include < vector>
3838
39- #include < iostream>
40-
4139static void init_tensor_uniform (ggml_tensor * tensor, float min = -1 .0f , float max = 1 .0f ) {
4240 size_t nels = ggml_nelements (tensor);
4341 std::vector<float > data (nels);
@@ -49,8 +47,8 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
4947 std::random_device rd;
5048 std::vector<std::default_random_engine> vec;
5149 vec.reserve (n_threads);
52- for (size_t i = 0 ; i < n_threads; i++) { vec.emplace_back (1234 + i); } // fixed seed
53- // for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
50+ // for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
51+ for (size_t i = 0 ; i < n_threads; i++) { vec.emplace_back (rd ()); }
5452 return vec;
5553 }();
5654
@@ -561,54 +559,6 @@ struct test_case {
561559 }
562560 }
563561
564- struct err_t {
565- float a_val, b_val, err;
566- size_t i;
567- };
568- std::vector<err_t > top_k_abs_err;
569- std::vector<err_t > top_k_rel_err;
570- size_t k = 10 ;
571- auto a = f1.data ();
572- auto b = f2.data (); // ref (cpu backend)
573- auto save_top_k_err = [=](size_t i, float a_i, float b_i, float err, std::vector<err_t >& top_k_err) {
574- if (top_k_err.size () < k) {
575- top_k_err.push_back ({a_i, b_i, err, i});
576- if (top_k_err.size () == k) {
577- std::sort (top_k_err.begin (), top_k_err.end (), [](const err_t & x, const err_t & y) {
578- return x.err > y.err ;
579- });
580- }
581- } else if (top_k_err.back ().err < err) {
582- top_k_err.back () = {a_i, b_i, err, i};
583- std::sort (top_k_err.begin (), top_k_err.end (), [](const err_t & x, const err_t & y) {
584- return x.err > y.err ;
585- });
586- }
587- };
588- double avg_abs_err = 0 .f ;
589- double avg_rel_err = 0 .f ;
590- for (size_t i = 0 ; i < f1.size (); i++) {
591- float a_i = a[i];
592- float b_i = b[i];
593- float abs_err = std::fabs (a_i - b_i);
594- float rel_err = (a_i - b_i) / std::fabs (b_i);
595- save_top_k_err (i, a_i, b_i, abs_err, top_k_abs_err);
596- save_top_k_err (i, a_i, b_i, rel_err, top_k_rel_err);
597- avg_abs_err += abs_err;
598- avg_rel_err += rel_err;
599- }
600- avg_abs_err /= f1.size ();
601- avg_rel_err /= f1.size ();
602- std::cout << " \n Avg abs err=" << avg_abs_err << " Top " << k << " abs err:\n " ;
603- for (const auto & err : top_k_abs_err) {
604- std::cout << " i=" << err.i << " a=" << err.a_val << " b=" << err.b_val << " abs err=" << err.err << " \n " ;
605- }
606- std::cout << " \n Avg rel err=" << avg_rel_err << " Top " << k << " rel err:\n " ;
607- for (const auto & err : top_k_rel_err) {
608- std::cout << " i=" << err.i << " a=" << err.a_val << " b=" << err.b_val << " rel err=" << err.err << " \n " ;
609- }
610- std::cout << std::endl;
611-
612562 double err = nmse (f1.data (), f2.data (), f1.size ());
613563 if (err > ud->max_err ) {
614564 printf (" [%s] NMSE = %.9f > %.9f " , ggml_op_desc (t1), err, ud->max_err );
@@ -2121,7 +2071,7 @@ struct test_mul_mat_id : public test_case {
21212071 const ggml_type type_b;
21222072 const int n_mats;
21232073 const int n_used;
2124- const bool b; // brodcast b matrix
2074+ const bool b; // broadcast b matrix
21252075 const int64_t m;
21262076 const int64_t n;
21272077 const int64_t k;
@@ -2656,6 +2606,8 @@ struct test_rope : public test_case {
26562606 } else {
26572607 out = ggml_rope_ext_back (ctx, a, pos, freq, n_dims, mode, 0 , 10000 .0f , fs, ef, af, 1 .0f , 1 .0f );
26582608 }
2609+
2610+ // TODO: add test with a non-contiguous view as input ; this case is needed for build_rope_2d in clip.cpp
26592611 }
26602612 ggml_set_name (out, " out" );
26612613
@@ -4195,13 +4147,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
41954147 test_cases.emplace_back (new test_mul_mat (type_a, GGML_TYPE_F32, 16 , i, 256 , { 1 , 1 }, {1 , 1 }));
41964148 }
41974149 }
4198- // TODO: Romain
4199- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 11008 , 1 , 4096 , {1 , 1 }, {1 , 1 }));
4200- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 11008 , 2 , 4096 , {1 , 1 }, {1 , 1 }));
4201- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 1 , 11008 , {1 , 1 }, {1 , 1 }));
4202- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 1 , 4096 , {1 , 1 }, {1 , 1 }));
4203- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 2 , 11008 , {1 , 1 }, {1 , 1 }));
4204- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_Q4_0, GGML_TYPE_F32, 4096 , 2 , 4096 , {1 , 1 }, {1 , 1 }));
42054150
42064151#if 1
42074152 for (ggml_type type_a : base_types) {
@@ -4485,10 +4430,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
44854430 test_cases.emplace_back (new test_timestep_embedding ());
44864431 test_cases.emplace_back (new test_leaky_relu ());
44874432
4488- for (int hsk : { 64 , 80 , 128 , 192 , 256 , }) {
4489- for (int hsv : { 64 , 80 , 128 , 192 , 256 , }) {
4490- if (hsk != 192 && hsk != hsv) continue ;
4433+ for (int hsk : { 64 , 80 , 128 , 192 , 256 , 576 }) {
4434+ for (int hsv : { 64 , 80 , 128 , 192 , 256 , 512 }) {
4435+ if (hsk != 192 && hsk != 576 && hsk != hsv) continue ;
44914436 if (hsk == 192 && (hsv != 128 && hsv != 192 )) continue ;
4437+ if (hsk == 576 && hsv != 512 ) continue ; // DeepSeek MLA
44924438
44934439 for (bool mask : { true , false } ) {
44944440 for (float max_bias : { 0 .0f , 8 .0f }) {
0 commit comments