@@ -1373,7 +1373,6 @@ struct ggml_compute_params {
13731373
13741374 // work buffer for all threads
13751375 size_t wsize ;
1376- size_t qsize ;
13771376 void * wdata ;
13781377
13791378 struct ggml_threadpool * threadpool ;
@@ -7598,18 +7597,13 @@ UseGgmlGemm1:;
75987597#endif
75997598
76007599 if (src1 -> type != vec_dot_type ) {
7601- char * wdata = (char * )params -> wdata + params -> wsize - params -> qsize ;
7602-
7603- if (strncmp (src1 -> name , wdata - GGML_MAX_NAME , GGML_MAX_NAME ) == 0 ) {
7604- goto AlreadyQunatized ;
7605- }
7606- wdata += GGML_MAX_NAME ;
7600+ char * wdata = params -> wdata ;
76077601
76087602 const size_t nbw1 = ggml_row_size (vec_dot_type , ne10 );
76097603 const size_t nbw2 = nbw1 * ne11 ;
76107604 const size_t nbw3 = nbw2 * ne12 ;
76117605
7612- assert (params -> qsize >= ne13 * nbw3 );
7606+ assert (params -> wsize >= ne13 * nbw3 );
76137607 GGML_ASSERT (src1 -> type == GGML_TYPE_F32 );
76147608
76157609 for (int64_t i13 = 0 ; i13 < ne13 ; ++ i13 ) {
@@ -7630,21 +7624,14 @@ UseGgmlGemm1:;
76307624 }
76317625 }
76327626 }
7627+ }
76337628
7634- ggml_barrier (params -> threadpool );
7635-
7636- if (ith == 0 ) {
7637- wdata -= GGML_MAX_NAME ;
7638- memcpy (wdata , src1 -> name , GGML_MAX_NAME );
7639- // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
7640- atomic_store_explicit (& params -> threadpool -> current_chunk , nth , memory_order_relaxed );
7641- }
7642-
7643- AlreadyQunatized :;
7629+ if (ith == 0 ) {
7630+ // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
7631+ atomic_store_explicit (& params -> threadpool -> current_chunk , nth , memory_order_relaxed );
76447632 }
76457633
7646- const void * wdata = (src1 -> type == vec_dot_type ) ? src1 -> data
7647- : (const void * )((const char * )params -> wdata + params -> wsize - params -> qsize + GGML_MAX_NAME );
7634+ ggml_barrier (params -> threadpool );
76487635
76497636#if GGML_USE_LLAMAFILE
76507637 if (src1 -> type != vec_dot_type ) {
@@ -7795,10 +7782,9 @@ static void ggml_compute_forward_mul_mat_id(
77957782 const int n_ids = ids -> ne [0 ]; // n_expert_used
77967783 const int n_as = ne02 ; // n_expert
77977784
7798- char * qdata = (char * )params -> wdata + params -> wsize - params -> qsize ;
7799-
7800- char * wdata_src1_end = (src1 -> type == vec_dot_type ) ? qdata :
7801- qdata + GGML_PAD (GGML_MAX_NAME + ggml_row_size (vec_dot_type , ggml_nelements (src1 )), sizeof (int64_t ));
7785+ char * wdata_src1_end = (src1 -> type == vec_dot_type ) ?
7786+ (char * ) params -> wdata :
7787+ (char * ) params -> wdata + GGML_PAD (ggml_row_size (vec_dot_type , ggml_nelements (src1 )), sizeof (int64_t ));
78027788
78037789 struct mmid_row_mapping {
78047790 int32_t i1 ;
@@ -7808,19 +7794,14 @@ static void ggml_compute_forward_mul_mat_id(
78087794 int64_t * matrix_row_counts = (int64_t * ) (wdata_src1_end ); // [n_as]
78097795 struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping * )(matrix_row_counts + n_as ); // [n_as][ne11]
78107796
7811- bool store_name = false;
78127797 if (src1 -> type != vec_dot_type ) {
7813- if (strncmp (src1 -> name , qdata , GGML_MAX_NAME ) == 0 ) {
7814- goto QuantizationAlreadyDone ;
7815- }
7816- store_name = true;
7817- char * wdata = qdata + GGML_MAX_NAME ;
7798+ char * wdata = params -> wdata ;
78187799
78197800 const size_t nbw1 = ggml_row_size (vec_dot_type , ne10 );
78207801 const size_t nbw2 = nbw1 * ne11 ;
78217802 const size_t nbw3 = nbw2 * ne12 ;
78227803
7823- assert (params -> qsize >= ne13 * nbw3 );
7804+ assert (params -> wsize >= ne13 * nbw3 );
78247805 GGML_ASSERT (src1 -> type == GGML_TYPE_F32 );
78257806
78267807 for (int64_t i13 = 0 ; i13 < ne13 ; ++ i13 ) {
@@ -7836,12 +7817,7 @@ static void ggml_compute_forward_mul_mat_id(
78367817
78377818#define MMID_MATRIX_ROW (row_id , i1 ) matrix_rows[(row_id)*ne12 + (i1)]
78387819
7839- QuantizationAlreadyDone :;
78407820 if (ith == 0 ) {
7841- if (store_name ) {
7842- memcpy (qdata , src1 -> name , GGML_MAX_NAME );
7843- }
7844-
78457821 // initialize matrix_row_counts
78467822 memset (matrix_row_counts , 0 , n_as * sizeof (int64_t ));
78477823
@@ -7870,7 +7846,7 @@ QuantizationAlreadyDone:;
78707846
78717847 const char * src0_cur = (const char * ) src0 -> data + cur_a * nb02 ;
78727848
7873- const void * wdata = (src1 -> type == vec_dot_type ) ? src1 -> data : qdata + GGML_MAX_NAME ;
7849+ const void * wdata = (src1 -> type == vec_dot_type ) ? src1 -> data : params -> wdata ;
78747850 const size_t row_size = ggml_row_size (vec_dot_type , ne10 );
78757851
78767852 const int64_t nr0 = ne01 ; // src0 rows
@@ -13369,7 +13345,6 @@ struct ggml_cplan ggml_graph_plan(
1336913345 }
1337013346
1337113347 size_t work_size = 0 ;
13372- size_t q_size = 0 ;
1337313348
1337413349 struct ggml_cplan cplan ;
1337513350 memset (& cplan , 0 , sizeof (struct ggml_cplan ));
@@ -13385,7 +13360,6 @@ struct ggml_cplan ggml_graph_plan(
1338513360 max_tasks = MAX (max_tasks , n_tasks );
1338613361
1338713362 size_t cur = 0 ;
13388- size_t cur_q = 0 ;
1338913363
1339013364 switch (node -> op ) {
1339113365 case GGML_OP_CPY :
@@ -13425,7 +13399,7 @@ struct ggml_cplan ggml_graph_plan(
1342513399 } else
1342613400#endif
1342713401 if (node -> src [1 ]-> type != vec_dot_type ) {
13428- cur_q = ggml_row_size (vec_dot_type , ggml_nelements (node -> src [1 ]));
13402+ cur = ggml_row_size (vec_dot_type , ggml_nelements (node -> src [1 ]));
1342913403 }
1343013404 } break ;
1343113405 case GGML_OP_MUL_MAT_ID :
@@ -13435,12 +13409,12 @@ struct ggml_cplan ggml_graph_plan(
1343513409 const struct ggml_tensor * src1 = node -> src [1 ];
1343613410 const enum ggml_type vec_dot_type = type_traits_cpu [src0 -> type ].vec_dot_type ;
1343713411 if (src1 -> type != vec_dot_type ) {
13438- cur_q += ggml_row_size (vec_dot_type , ggml_nelements (src1 ));
13412+ cur += ggml_row_size (vec_dot_type , ggml_nelements (src1 ));
1343913413 }
1344013414 const int n_as = src0 -> ne [2 ];
13441- cur_q += GGML_PAD (cur , sizeof (int64_t )); // align
13442- cur_q += n_as * sizeof (int64_t ); // matrix_row_counts
13443- cur_q += n_as * src1 -> ne [2 ] * sizeof (int64_t ); // matrix_rows
13415+ cur += GGML_PAD (cur , sizeof (int64_t )); // align
13416+ cur += n_as * sizeof (int64_t ); // matrix_row_counts
13417+ cur += n_as * src1 -> ne [2 ] * sizeof (int64_t ); // matrix_rows
1344413418 } break ;
1344513419 case GGML_OP_OUT_PROD :
1344613420 {
@@ -13529,21 +13503,15 @@ struct ggml_cplan ggml_graph_plan(
1352913503 }
1353013504
1353113505 work_size = MAX (work_size , cur );
13532- q_size = MAX (q_size , cur_q );
1353313506 }
1353413507
1353513508 if (work_size > 0 ) {
1353613509 work_size += CACHE_LINE_SIZE * (n_threads );
1353713510 }
13538- if (q_size > 0 ) {
13539- q_size += GGML_MAX_NAME ;
13540- }
13541- work_size += q_size ;
1354213511
1354313512 cplan .threadpool = threadpool ;
1354413513 cplan .n_threads = MIN (max_tasks , n_threads );
1354513514 cplan .work_size = work_size ;
13546- cplan .q_size = q_size ;
1354713515 cplan .work_data = NULL ;
1354813516
1354913517 return cplan ;
@@ -13562,7 +13530,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1356213530 /*.ith =*/ state -> ith ,
1356313531 /*.nth =*/ atomic_load_explicit (& tp -> n_threads_cur , memory_order_relaxed ),
1356413532 /*.wsize =*/ cplan -> work_size ,
13565- /*.qsize =*/ cplan -> q_size ,
1356613533 /*.wdata =*/ cplan -> work_data ,
1356713534 /*.threadpool=*/ tp ,
1356813535 };
0 commit comments