@@ -1154,9 +1154,9 @@ struct test_case {
11541154 double err = nmse (f1.data (), f2.data (), f1.size ());
11551155 if (err > ud->max_err ) {
11561156 printf (" [%s] NMSE = %.9f > %.9f " , ggml_op_desc (t1), err, ud->max_err );
1157- // for (int i = 0; i < (int) f1.size(); i++) {
1158- // printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
1159- // }
1157+ for (int i = 0 ; i < (int ) f1.size (); i++) {
1158+ printf (" %5d %9.6f %9.6f, diff = %9.6f\n " , i, f1[i], f2[i], f1[i] - f2[i]);
1159+ }
11601160 printf (" \n " );
11611161 // exit(1);
11621162 ud->ok = false ;
@@ -2953,6 +2953,7 @@ struct test_mul_mat : public test_case {
29532953 return out;
29542954 }
29552955
2956+ #if 0
29562957 void initialize_tensors(ggml_context * ctx) override {
29572958 for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
29582959 if (t->type == GGML_TYPE_F32) {
@@ -2976,16 +2977,30 @@ struct test_mul_mat : public test_case {
29762977 for (int64_t r = 0; r < ggml_nrows(t); r++) {
29772978 std::vector<float> data(t->ne[0]);
29782979 for (int i = 0; i < t->ne[0]; i++) {
2979- data[i] = (1 .33f * i + r) / (t->ne [0 ] + t->ne [1 ] * 1.2 );
2980+ //data[i] = (1.33f * i + r) / (t->ne[0] + t->ne[1] * 1.2);
2981+ data[i] = 1.0f;
29802982 }
29812983 ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
29822984 }
29832985 }
2984- } else {
2986+ } /* else if (t->type == GGML_TYPE_Q8_0) {
2987+ if (t->name[0] == 'a') {
2988+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
2989+ std::vector<uint8_t> data(t->ne[0]);
2990+ for (int i = 0; i < t->ne[0]; i++) {
2991+ //data[i] = (4 * i + r) / (t->ne[0] + t->ne[1] * 0.5);
2992+ //data[i] = std::min(i, 32);
2993+ data[i] = 128;
2994+ }
2995+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(uint8_t));
2996+ }
2997+ }
2998+ } */ else {
29852999 init_tensor_uniform(t);
29863000 }
29873001 }
29883002 }
3003+ #endif
29893004};
29903005
29913006// GGML_OP_MUL_MAT_ID
@@ -3107,6 +3122,56 @@ struct test_out_prod : public test_case {
31073122
31083123 return out;
31093124 }
3125+
3126+ #if 0
3127+ void initialize_tensors(ggml_context * ctx) override {
3128+ for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
3129+ if (t->type == GGML_TYPE_F32) {
3130+ if (t->name[0] == 'o') {
3131+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
3132+ std::vector<float> data(t->ne[0]);
3133+ for (int i = 0; i < t->ne[0]; i++) {
3134+ data[i] = -13.0f;
3135+ }
3136+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
3137+ }
3138+ } else if (t->name[0] == 'a') {
3139+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
3140+ std::vector<float> data(t->ne[0]);
3141+ for (int i = 0; i < t->ne[0]; i++) {
3142+ data[i] = (3.50f * i + r) / (t->ne[0] + t->ne[1] * 0.5);
3143+ }
3144+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
3145+ }
3146+ } else {
3147+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
3148+ std::vector<float> data(t->ne[0]);
3149+ for (int i = 0; i < t->ne[0]; i++) {
3150+ //data[i] = (1.33f * i + r) / (t->ne[0] + t->ne[1] * 1.2);
3151+ data[i] = i+1;
3152+ }
3153+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
3154+ }
3155+ }
3156+ } else if (t->type == GGML_TYPE_Q8_0) {
3157+ if (t->name[0] == 'a') {
3158+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
3159+ std::vector<uint8_t> data(t->ne[0]);
3160+ for (int i = 0; i < t->ne[0]; i++) {
3161+ //data[i] = (4 * i + r) / (t->ne[0] + t->ne[1] * 0.5);
3162+ //data[i] = std::min(i, 32);
3163+ data[i] = 100 + i;
3164+ }
3165+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(uint8_t));
3166+ }
3167+ }
3168+ } else {
3169+ init_tensor_uniform(t);
3170+ }
3171+ }
3172+ }
3173+ #endif
3174+
31103175};
31113176
31123177// GGML_OP_SQR
@@ -4856,7 +4921,8 @@ struct test_falcon : public test_llm {
48564921// ###########################################
48574922static const ggml_type all_types[] = {
48584923 GGML_TYPE_F32, GGML_TYPE_F16, // GGML_TYPE_BF16,
4859- // GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
4924+ GGML_TYPE_Q4_0,
4925+ // GGML_TYPE_Q4_1,
48604926// GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
48614927 GGML_TYPE_Q8_0,
48624928// GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
@@ -4871,13 +4937,14 @@ static const ggml_type all_types[] = {
48714937static const ggml_type base_types[] = {
48724938 GGML_TYPE_F32, GGML_TYPE_F16,
48734939 GGML_TYPE_Q8_0, // for I8MM tests
4874- // GGML_TYPE_Q4_0,
4940+ GGML_TYPE_Q4_0,
48754941// GGML_TYPE_Q4_1, // for I8MM tests
48764942// GGML_TYPE_Q4_K,
48774943// GGML_TYPE_IQ2_XXS
48784944};
48794945
48804946static const ggml_type other_types[] = {
4947+ GGML_TYPE_Q4_0,
48814948// GGML_TYPE_Q4_1,
48824949// GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
48834950 GGML_TYPE_Q8_0,
@@ -5231,13 +5298,58 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
52315298 test_cases.emplace_back (new test_gla (GGML_TYPE_F32, 32 , 64 , 32 , 4 ));
52325299 test_cases.emplace_back (new test_gla (GGML_TYPE_F32, 32 , 64 , 128 , 4 ));
52335300
5234- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F32, GGML_TYPE_F32, 32 , 2 , 2 , {1 , 1 }, {1 , 1 }));
5235- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F32 , GGML_TYPE_F32, 4096 *2 , 256 , 1024 , {1 , 1 }, {1 , 1 }));
5236- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F32, GGML_TYPE_F32, 4096 * 20 , 256 , 1024 , {1 , 1 }, {1 , 1 }));
5237- test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F32 , GGML_TYPE_F32, 4096 * 30 , 256 , 1024 , {1 , 1 }, {1 , 1 }));
5301+ test_cases.emplace_back (new test_out_prod (GGML_TYPE_F32, GGML_TYPE_F32, 4096 * 20 , 256 , 1024 , {1 , 1 }, {1 , 1 }));
5302+ test_cases.emplace_back (new test_out_prod (GGML_TYPE_Q8_0 , GGML_TYPE_F32, 4096 *20 , 256 , 1024 , {1 , 1 }, {1 , 1 }));
5303+ test_cases.emplace_back (new test_out_prod (GGML_TYPE_F32, GGML_TYPE_F32, 1024 , 256 , 4096 * 40 , {1 , 1 }, {1 , 1 }));
5304+ test_cases.emplace_back (new test_out_prod (GGML_TYPE_Q8_0 , GGML_TYPE_F32, 1024 , 256 , 4096 * 40 , {1 , 1 }, {1 , 1 }));
52385305
5306+ #if 0
5307+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 2, 2, 32, {1, 1}, {1, 1}));
5308+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 4, 4, 32, {1, 1}, {1, 1}));
5309+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 2, 2, 32, {1, 1}, {1, 1}));
5310+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4, 4, 32, {1, 1}, {1, 1}));
52395311
5312+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4096*20, 256, 1024, {1, 1}, {1, 1}));
5313+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 4096*20, 256, 1024, {1, 1}, {1, 1}));
5314+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 1024, 256, 4096*20, {1, 1}, {1, 1}));
5315+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 1024, 256, 4096*20, {1, 1}, {1, 1}));
5316+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 1024, 256, 4096*40, {1, 1}, {1, 1}));
5317+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 1024, 256, 4096*40, {1, 1}, {1, 1}));
5318+ #endif
5319+
52405320#if 0
5321+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 2, 2, 1024, {1, 1}, {1, 1}));
5322+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 1024, 2, 2, {1, 1}, {1, 1}));
5323+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 2, 1024, 2, {1, 1}, {1, 1}));
5324+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 2, 1, 4096, {1, 1}, {1, 1}));
5325+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 1, 2, 4096, {1, 1}, {1, 1}));
5326+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 2, 2, 4096, {1, 1}, {1, 1}));
5327+
5328+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 2, 2, 32, {1, 1}, {1, 1}));
5329+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4, 4, 32, {1, 1}, {1, 1}));
5330+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4, 4, 64, {1, 1}, {1, 1}));
5331+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 8, 8, 128, {1, 1}, {1, 1}));
5332+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 32, 2, 2, {1, 1}, {1, 1}));
5333+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 16, 2, {1, 1}, {1, 1}));
5334+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 16, 64, 2, {1, 1}, {1, 1}));
5335+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 16, 4, {1, 1}, {1, 1}));
5336+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 16, 64, 4, {1, 1}, {1, 1}));
5337+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 4, 16, 32, {1, 1}, {1, 1}));
5338+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 16, 4, 32, {1, 1}, {1, 1}));
5339+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 4, 16, 64, {1, 1}, {1, 1}));
5340+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 16, 4, 64, {1, 1}, {1, 1}));
5341+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 32, 32, 32, {1, 1}, {1, 1}));
5342+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 32, 64, 64, {1, 1}, {1, 1}));
5343+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 32, 64, {1, 1}, {1, 1}));
5344+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 64, 32, {1, 1}, {1, 1}));
5345+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 64, 64, {1, 1}, {1, 1}));
5346+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 128, 64, 2, {1, 1}, {1, 1}));
5347+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 256, 128, 32, {1, 1}, {1, 1}));
5348+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 256, 128, 64, {1, 1}, {1, 1}));
5349+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 512, 256, 1024, {1, 1}, {1, 1}));
5350+ #endif
5351+
5352+ #if 1
52415353 for (ggml_type type_a : all_types) {
52425354 for (int i = 1 ; i < 10 ; ++i) {
52435355 test_cases.emplace_back (new test_mul_mat (type_a, GGML_TYPE_F32, 16 , i, 256 , { 1 , 1 }, {1 , 1 }));
@@ -5329,12 +5441,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
53295441 test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 1056 , 1 , 193 , {1 , 1 }, {4 , 1 }, {0 , 2 , 1 , 3 }));
53305442 test_cases.emplace_back (new test_mul_mat (GGML_TYPE_F16, GGML_TYPE_F32, 1056 , 1 , 67 , {1 , 1 }, {4 , 1 }, {0 , 2 , 1 , 3 }));
53315443
5332- // XXX
5333- //test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 151936, 256, 1024, {1, 1}, {1, 1}));
5334- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 128, 256, 1024, {1, 1}, {1, 1}));
5335- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4096*8, 256, 1024, {1, 1}, {1, 1}));
5336- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 4096*16, 256, 1024, {1, 1}, {1, 1}));
5337-
53385444 for (auto bs : {1 ,2 ,4 ,8 }) {
53395445 for (auto nr : {1 ,4 }) {
53405446 for (uint32_t m = 0 ; m < 2 ; ++m) {
0 commit comments