@@ -250,13 +250,6 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
250250 const char * data = is_host ? (const char *) src1->data : m_src1_data.data ();
251251 GGML_ASSERT (src1->nb [0 ] == ggml_element_size (src1));
252252
253- // TODO: 4d? (is that even used in practice?)
254- // the extra dimension would need to be stored somewhere to be reflected in the imatrix file
255- if (ggml_nrows (src1) != src1->ne [1 ] * src1->ne [2 ]) {
256- LOG_ERR (" %s: tensor has more than 3 dimensions: %s" , __func__, wname.c_str ());
257- GGML_ASSERT (false );
258- }
259-
260253 // this has been adapted to the new format of storing merged experts in a single 3d tensor
261254 // ref: https://github.com/ggml-org/llama.cpp/pull/6387
262255 if (t->op == GGML_OP_MUL_MAT_ID) {
@@ -272,6 +265,12 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
272265
273266 GGML_ASSERT (ids->ne [1 ] == src1->ne [2 ]);
274267
268+ // the extra dimension would need to be stored somewhere to be reflected in the imatrix file
269+ if (ggml_nrows (src1) != src1->ne [1 ] * src1->ne [2 ]) {
270+ LOG_ERR (" %s: tensor has more than 3 dimensions: %s" , __func__, wname.c_str ());
271+ GGML_ASSERT (false );
272+ }
273+
275274 m_ids.resize (ggml_nbytes (ids));
276275 ggml_backend_tensor_get (ids, m_ids.data (), 0 , ggml_nbytes (ids));
277276
@@ -335,29 +334,40 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
335334 }
336335 } else {
337336 auto & e = m_stats[wname];
338- const int64_t n_mat = src1->ne [2 ] * src1->ne [3 ];
339-
337+ const int64_t n_mat = src0->ne [2 ] * src0->ne [3 ];
338+
339+ // use a single count per dense tensor
340+ // (necessary when merging older GGUF-imatrix files with 3d tensors)
341+ if (e.counts .size () > 1 ) {
342+ bool all_equal = true ;
343+ for (size_t i = 1 ; i < e.counts .size (); ++i) {
344+ if (e.counts [0 ] != e.counts [i]) {
345+ all_equal = false ;
346+ break ;
347+ }
348+ }
349+ if (all_equal) {
350+ e.counts .resize (1 );
351+ }
352+ }
340353 if (e.values .empty ()) {
341354 e.values .resize (src1->ne [0 ] * n_mat, 0 );
342- e.counts .resize (n_mat , 0 );
355+ e.counts .resize (1 , 0 );
343356 }
344357 else if (e.values .size () != (size_t )(src1->ne [0 ] * n_mat)) {
345358 LOG_ERR (" %s: inconsistent size for %s (%d vs %d)\n " , __func__, wname.c_str (), (int )e.values .size (), (int )(src1->ne [0 ] * n_mat));
346359 exit (1 ); // GGML_ABORT("fatal error");
347360 }
348- else if (e.counts .size () != (size_t )n_mat) {
349- LOG_ERR (" %s: inconsistent expert count for %s (%d vs %d)\n " , __func__, wname.c_str (), (int )e.counts .size (), (int )n_mat);
350- exit (1 ); // GGML_ABORT("fatal error");
351- }
352361 LOG_DBGV (2 , " %s[%d]: %32s, %s, %5d x %5d x %5d, %d\n " , __func__, m_last_chunk, wname.c_str (), ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->ne [2 ], (int )src1->type );
362+
353363 for (int64_t i3 = 0 ; i3 < src1->ne [3 ]; ++i3) {
354364 for (int64_t i2 = 0 ; i2 < src1->ne [2 ]; ++i2) {
355- const int64_t mat_id = i3 * src1->ne [2 ] + i2;
365+ // handle 3D+ tensors, but flatten 3D+ activations when model tensor is 2D
366+ const int64_t mat_id = (i3 % src0->ne [3 ]) * src0->ne [2 ] + (i2 % src0->ne [2 ]);
356367 const int64_t mat_start = mat_id * src1->ne [0 ];
357368
358369 for (int64_t row = 0 ; row < src1->ne [1 ]; ++row) {
359- const float * x = (const float *) (data + row * src1->nb [1 ] + i2 * src1->nb [2 ] + i3 * src1->ne [3 ]);
360- e.counts [mat_id]++;
370+ const float * x = (const float *) (data + row * src1->nb [1 ] + i2 * src1->nb [2 ] + i3 * src1->nb [3 ]);
361371 for (int64_t j = 0 ; j < src1->ne [0 ]; ++j) {
362372 e.values [mat_start + j] += x[j] * x[j];
363373 if (!std::isfinite ((float )e.values [j])) {
@@ -366,16 +376,20 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
366376 }
367377 }
368378 }
369- const int32_t n_chunk = e.counts [mat_id] / chunk_size;
370- if (n_chunk > m_last_chunk) {
371- const int32_t chunk_step = n_chunk - m_last_chunk;
372- m_last_chunk = n_chunk;
373- if ((m_last_chunk % m_params.n_out_freq ) / chunk_step == 0 ) {
374- save_imatrix ();
375- }
376- if (m_params.n_save_freq > 0 && (m_last_chunk % m_params.n_save_freq ) / chunk_step == 0 ) {
377- save_imatrix (m_last_chunk);
378- }
379+ }
380+ }
381+ // only 1 count in practice, except when a tensor is used for both MUL_MAT_ID and MUL_MAT
382+ for (size_t i = 0 ; i < e.counts .size (); ++i) {
383+ e.counts [i] += ggml_nrows (src1) / n_mat;
384+ const int32_t n_chunk = e.counts [i] / chunk_size;
385+ if (n_chunk > m_last_chunk) {
386+ const int32_t chunk_step = n_chunk - m_last_chunk;
387+ m_last_chunk = n_chunk;
388+ if ((m_last_chunk % m_params.n_out_freq ) / chunk_step == 0 ) {
389+ save_imatrix ();
390+ }
391+ if (m_params.n_save_freq > 0 && (m_last_chunk % m_params.n_save_freq ) / chunk_step == 0 ) {
392+ save_imatrix (m_last_chunk);
379393 }
380394 }
381395 }
0 commit comments