@@ -32,15 +32,20 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
3232}
3333
3434static  float  tensor_sum_elements (const  ggml_tensor * tensor) {
35-     double  sum = 0 ;
35+     double  sum1;
36+     // printf("sum inside = %f\n", sum1);
3637    if  (tensor->type  == GGML_TYPE_F32) {
3738        for  (int  j = 0 ; j < tensor->ne [1 ]; j++) {
3839            for  (int  k = 0 ; k < tensor->ne [0 ]; k++) {
39-                 sum += ((float  *) tensor->data )[j*tensor->ne [0 ] + k];
40+ 			// printf("sum inside = %f\n", sum1);
41+                 printf (" %f \t  " float  *) tensor->data )[j*tensor->ne [0 ] + k]);
42+ 		    sum1 = sum1 + ((float  *) tensor->data )[j*tensor->ne [0 ] + k];
43+ 		    // printf("sum inside = %f\n", sum1);
4044            }
45+ 	    printf (" \n " 
4146        }
4247    }
43-     return  sum ;
48+     return  sum1 ;
4449}
4550
4651static  void  tensor_dump (const  ggml_tensor * tensor, const  char  * name) {
@@ -109,9 +114,14 @@ int main(int argc, char ** argv)  {
109114
110115#undef  VERBOSE_DEBUGGING
111116#ifndef  VERBOSE_DEBUGGING
117+     /* 
112118    const int sizey = 4096; 
113119    const int sizex = 11008; 
114120    const int sizez = 128; 
121+     */  
122+     const  int  sizey = 40 ;
123+     const  int  sizex = 32 *128 ;
124+     const  int  sizez = 2 ;
115125#else 
116126    /*  Working - let's increase size */ 
117127    const  int  sizey = 1 ;
@@ -126,13 +136,14 @@ int main(int argc, char ** argv)  {
126136    // printf("Memsize required = %i\n", sizex*sizex);
127137
128138    //  TODO: perform the bench for all types or for a user specified type
129-     const  ggml_type qtype = GGML_TYPE_Q4_1 ;
139+     const  ggml_type qtype = GGML_TYPE_Q8_0 ;
130140
131141    size_t  ctx_size = 0 ;
132142    ctx_size += ggml_row_size (GGML_TYPE_F32, sizex*sizey);
133143    ctx_size += ggml_row_size (GGML_TYPE_F32, sizex*sizey);
134144    ctx_size += ggml_row_size (GGML_TYPE_F32, sizex*sizez);
135145    ctx_size += ggml_row_size (qtype,         sizex*sizey);
146+     ctx_size += ggml_row_size (qtype,         sizex*sizez);
136147    ctx_size += ggml_row_size (qtype,         sizex*sizey);
137148    ctx_size += ggml_row_size (GGML_TYPE_F32, sizex*sizey); //  BLAS
138149    ctx_size += ggml_row_size (GGML_TYPE_F32, sizex*sizey); //  BLAS
@@ -156,15 +167,15 @@ int main(int argc, char ** argv)  {
156167    printf (" Creating new tensors\n " 
157168    //  printf("Creating new tensor m1\n");
158169    struct  ggml_tensor  * m11 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, sizex, sizey);
159-     ggml_set_f32 (m11, 1 . 0f );
170+     ggml_set_f32 (m11, - 1 . 23f );
160171
161172    //  printf("Creating new tensor m1\n");
162173    struct  ggml_tensor  * m12 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, sizex, sizey);
163174    ggml_set_f32 (m12, 1 .5f );
164175
165176    //  printf("Creating new tensor m2\n");
166177    struct  ggml_tensor  * m2 = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, sizex, sizez);
167-     ggml_set_f32 (m2, 2 . 0f );
178+     ggml_set_f32 (m2, - 12 . 23f );
168179
169180    printf (" \n ------ Test 1 - Matrix Mult via F32 code\n " 
170181    //  printf("Creating new tensor m11xm2\n");
@@ -176,27 +187,34 @@ int main(int argc, char ** argv)  {
176187
177188    printf (" n_threads=%i\n " n_threads );
178189
179-     TENSOR_DUMP (m11);
180-     TENSOR_DUMP (m2);
190+     // TENSOR_DUMP(m11);
191+     // TENSOR_DUMP(m2);
181192
182193    std::vector<uint8_t > work_buffer;
183194
184195    ggml_graph_compute_helper (work_buffer, gf, benchmark_params.n_threads );
185196
186-     TENSOR_DUMP (ggml_graph_node (gf, 0 ));
197+     // TENSOR_DUMP(ggml_graph_node(gf, 0));
187198
188199    printf (" \n ------ Test 2 - Matrix Mult via %s code\n " ggml_type_name (qtype));
189200
190201    int32_t  nelements = sizex*sizey;
202+     int32_t  nelements2 = sizex*sizez;
191203
192204    //  Set up a the benchmark matrices
193205    //  printf("Creating new tensor q11 & Running quantize\n");
194206    struct  ggml_tensor  * q11 = ggml_new_tensor_2d (ctx, qtype, sizex, sizey);
195207    ggml_quantize_chunk (qtype, (const  float  *) m11->data , q11->data , 0 , nelements/m11->ne [0 ], m11->ne [0 ], nullptr );
208+     // TENSOR_DUMP(q11);
196209
210+     //  printf("Creating new tensor q2 & Running quantize\n");
211+     struct  ggml_tensor  * q2 = ggml_new_tensor_2d (ctx, qtype, sizex, sizez);
212+     ggml_quantize_chunk (qtype, (const  float  *) m2->data , q2->data , 0 , nelements2/m2->ne [0 ], m2->ne [0 ], nullptr );
213+     // TENSOR_DUMP(q2);
214+      
197215    //  Set up a the compute graph
198216    //  printf("Creating new tensor q31\n");
199-     struct  ggml_tensor  * q31 = ggml_mul_mat (ctx, q11, m2 );
217+     struct  ggml_tensor  * q31 = ggml_mul_mat (ctx, q11, q2 );
200218
201219    //  printf("Creating compute graph\n");
202220    struct  ggml_cgraph  * gf31 = ggml_new_graph (ctx);
@@ -235,6 +253,7 @@ int main(int argc, char ** argv)  {
235253        long  long  int  start = ggml_time_us ();
236254        // printf("Running ggml_graph_compute\n");
237255        ggml_graph_compute_helper (work_buffer, gf31, benchmark_params.n_threads );
256+         TENSOR_DUMP (ggml_graph_node (gf31, 0 ));
238257
239258        long  long  int  stop = ggml_time_us ();
240259        long  long  int  usec = stop-start;
@@ -247,7 +266,7 @@ int main(int argc, char ** argv)  {
247266            usec,gflops);
248267
249268#ifdef  VERBOSE_DEBUGGING
250-         TENSOR_DUMP (" res" nodes [0 ])
269+         // TENSOR_DUMP("res",gf31.nodes[0])
251270#endif 
252271
253272        //  Check that the matrix multiplication result is in the right ballpark
0 commit comments