@@ -2586,7 +2586,6 @@ struct ggml_compute_params {
25862586
25872587 // work buffer for all threads
25882588 size_t wsize;
2589- size_t qsize;
25902589 void * wdata;
25912590
25922591 struct ggml_compute_state_shared * shared;
@@ -13940,7 +13939,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
1394013939 return;
1394113940 }
1394213941
13943- const void * wdata = (src1->type == vec_dot_type) ? src1->data : (char *)params->wdata + params->wsize - params->qsize + GGML_MAX_NAME ;
13942+ const void * wdata = (src1->type == vec_dot_type) ? src1->data : (char *)params->wdata;
1394413943 const size_t row_size = ggml_row_size(vec_dot_type, ne10);
1394513944
1394613945 assert(ne12 % ne02 == 0);
@@ -14110,12 +14109,7 @@ UseGgmlGemm1:;
1411014109#endif
1411114110
1411214111 if (src1->type != vec_dot_type) {
14113- char * wdata = (char *)params->wdata + params->wsize - params->qsize;
14114-
14115- if (strncmp(src1->name, wdata - GGML_MAX_NAME, GGML_MAX_NAME) == 0) {
14116- goto AlreadyQuantized;
14117- }
14118- wdata += GGML_MAX_NAME;
14112+ char * wdata = params->wdata;
1411914113
1412014114#if IK_PRINT_TIMING
1412114115 int64_t t1 = ggml_time_us();
@@ -14125,7 +14119,7 @@ UseGgmlGemm1:;
1412514119 const size_t nbw2 = nbw1*ne11;
1412614120 const size_t nbw3 = nbw2*ne12;
1412714121
14128- assert(params->qsize >= ne13*nbw3);
14122+ assert(params->wsize >= ne13*nbw3);
1412914123 GGML_ASSERT(src1->type == GGML_TYPE_F32);
1413014124
1413114125 for (int64_t i13 = 0; i13 < ne13; ++i13) {
@@ -14157,17 +14151,14 @@ UseGgmlGemm1:;
1415714151#endif
1415814152
1415914153 if (ith == 0) {
14160- wdata -= GGML_MAX_NAME;
14161- memcpy(wdata, src1->name, GGML_MAX_NAME);
1416214154 // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
1416314155 //atomic_store(¶ms->shared->current_chunk, nth);
1416414156 }
1416514157
14166- AlreadyQuantized: ;
14158+ ggml_barrier(params->shared) ;
1416714159 }
1416814160
14169- const void * wdata = (src1->type == vec_dot_type) ? src1->data
14170- : (const void *)((const char *)params->wdata + params->wsize - params->qsize + GGML_MAX_NAME);
14161+ const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
1417114162
1417214163#if GGML_USE_IQK_MULMAT
1417314164 if (src1->type != vec_dot_type && dst->type == GGML_TYPE_F32) {
@@ -14354,10 +14345,9 @@ static void ggml_compute_forward_mul_mat_id(
1435414345 const int n_ids = ids->ne[0]; // n_expert_used
1435514346 const int n_as = ne02; // n_expert
1435614347
14357- char * qdata = (char *)params->wdata + params->wsize - params->qsize;
14358-
14359- char * wdata_src1_end = (src1->type == vec_dot_type) ? qdata :
14360- qdata + GGML_PAD(GGML_MAX_NAME + ggml_row_size(vec_dot_type, ggml_nelements(src1)), sizeof(int64_t));
14348+ char * wdata_src1_end = (src1->type == vec_dot_type) ?
14349+ (char *) params->wdata :
14350+ (char *) params->wdata + GGML_PAD(ggml_row_size(vec_dot_type, ggml_nelements(src1)), sizeof(int64_t));
1436114351
1436214352 struct mmid_row_mapping {
1436314353 int32_t i1;
@@ -14367,19 +14357,14 @@ static void ggml_compute_forward_mul_mat_id(
1436714357 int64_t * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as]
1436814358 struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *)(matrix_row_counts + n_as); // [n_as][ne11]
1436914359
14370- bool store_name = false;
1437114360 if (src1->type != vec_dot_type) {
14372- if (strncmp(src1->name, qdata, GGML_MAX_NAME) == 0) {
14373- goto QuantizationAlreadyDone;
14374- }
14375- store_name = true;
14376- char * wdata = qdata + GGML_MAX_NAME;
14361+ char * wdata = params->wdata;
1437714362
1437814363 const size_t nbw1 = ggml_row_size(vec_dot_type, ne10);
1437914364 const size_t nbw2 = nbw1*ne11;
1438014365 const size_t nbw3 = nbw2*ne12;
1438114366
14382- assert(params->qsize >= ne13*nbw3);
14367+ assert(params->wsize >= ne13*nbw3);
1438314368 GGML_ASSERT(src1->type == GGML_TYPE_F32);
1438414369
1438514370 for (int64_t i13 = 0; i13 < ne13; ++i13) {
@@ -14395,12 +14380,7 @@ static void ggml_compute_forward_mul_mat_id(
1439514380
1439614381#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)]
1439714382
14398- QuantizationAlreadyDone:;
1439914383 if (ith == 0) {
14400- if (store_name) {
14401- memcpy(qdata, src1->name, GGML_MAX_NAME);
14402- }
14403-
1440414384 // initialize matrix_row_counts
1440514385 memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
1440614386
@@ -14429,7 +14409,7 @@ QuantizationAlreadyDone:;
1442914409
1443014410 const char * src0_cur = (const char *) src0->data + cur_a*nb02;
1443114411
14432- const void * wdata = (src1->type == vec_dot_type) ? src1->data : qdata + GGML_MAX_NAME ;
14412+ const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata ;
1443314413 const size_t row_size = ggml_row_size(vec_dot_type, ne10);
1443414414
1443514415 const int64_t nr0 = ne01; // src0 rows
@@ -21017,7 +20997,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
2101720997 }
2101820998
2101920999 size_t work_size = 0;
21020- size_t q_size = 0;
2102121000
2102221001 struct ggml_cplan cplan;
2102321002 memset(&cplan, 0, sizeof(struct ggml_cplan));
@@ -21033,7 +21012,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
2103321012 max_tasks = MAX(max_tasks, n_tasks);
2103421013
2103521014 size_t cur = 0;
21036- size_t cur_q = 0;
2103721015
2103821016 switch (node->op) {
2103921017 case GGML_OP_CPY:
@@ -21064,8 +21042,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
2106421042 const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
2106521043
2106621044 if (node->src[1]->type != vec_dot_type) {
21067- cur_q = ggml_row_size(vec_dot_type, node->src[1]->ne[0]) * ggml_nrows(node->src[1]);
21068- //cur_q = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
21045+ cur = ggml_row_size(vec_dot_type, node->src[1]->ne[0]) * ggml_nrows(node->src[1]);
2106921046 }
2107021047 } break;
2107121048 case GGML_OP_MUL_MAT_ID:
@@ -21075,13 +21052,12 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
2107521052 const struct ggml_tensor * src1 = node->src[1];
2107621053 const enum ggml_type vec_dot_type = type_traits[src0->type].vec_dot_type;
2107721054 if (src1->type != vec_dot_type) {
21078- cur_q += ggml_row_size(vec_dot_type, node->src[1]->ne[0]) * ggml_nrows(node->src[1]);
21079- //cur_q += ggml_row_size(vec_dot_type, ggml_nelements(src1));
21055+ cur += ggml_row_size(vec_dot_type, node->src[1]->ne[0]) * ggml_nrows(node->src[1]);
2108021056 }
2108121057 const int n_as = src0->ne[2];
21082- cur_q += GGML_PAD(cur, sizeof(int64_t)); // align
21083- cur_q += n_as * sizeof(int64_t); // matrix_row_counts
21084- cur_q += n_as * src1->ne[2] * sizeof(int64_t); // matrix_rows
21058+ cur += GGML_PAD(cur, sizeof(int64_t)); // align
21059+ cur += n_as * sizeof(int64_t); // matrix_row_counts
21060+ cur += n_as * src1->ne[2] * sizeof(int64_t); // matrix_rows
2108521061 } break;
2108621062 case GGML_OP_OUT_PROD:
2108721063 {
@@ -21170,20 +21146,14 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
2117021146 }
2117121147
2117221148 work_size = MAX(work_size, cur);
21173- q_size = MAX(q_size, cur_q);
2117421149 }
2117521150
2117621151 if (work_size > 0) {
2117721152 work_size += CACHE_LINE_SIZE*(n_threads - 1);
2117821153 }
21179- if (q_size > 0) {
21180- q_size += GGML_MAX_NAME;
21181- }
21182- work_size += q_size;
2118321154
2118421155 cplan.n_threads = MIN(max_tasks, n_threads);
2118521156 cplan.work_size = work_size;
21186- cplan.q_size = q_size;
2118721157 cplan.work_data = NULL;
2118821158
2118921159 return cplan;
@@ -21201,7 +21171,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
2120121171 /*.ith =*/ state->ith,
2120221172 /*.nth =*/ state->shared->n_threads,
2120321173 /*.wsize =*/ cplan->work_size,
21204- /*.qsize =*/ cplan->q_size,
2120521174 /*.wdata =*/ cplan->work_data,
2120621175 /*.shared=*/ state->shared,
2120721176 };
0 commit comments