44
55#include < climits>
66#include < cstdarg>
7+ #include < cinttypes>
78#include < string>
89#include < map>
910#include < sstream>
4445// tensor name constants
4546//
4647
47- #define TN_POS_EMBD " %s .position_embd.weight"
48+ #define TN_POS_EMBD " v .position_embd.weight"
4849#define TN_CLASS_EMBD " v.class_embd"
4950#define TN_PATCH_EMBD " v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat
5051#define TN_PATCH_EMBD_1 " v.patch_embd.weight.1"
@@ -110,6 +111,7 @@ enum projector_type {
110111 PROJECTOR_TYPE_PIXTRAL,
111112 PROJECTOR_TYPE_QWEN25VL,
112113 PROJECTOR_TYPE_INTERNVL,
114+ PROJECTOR_TYPE_LLAMA4,
113115 PROJECTOR_TYPE_UNKNOWN,
114116};
115117
@@ -125,6 +127,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
125127 { PROJECTOR_TYPE_IDEFICS3, " idefics3" },
126128 { PROJECTOR_TYPE_PIXTRAL, " pixtral" },
127129 { PROJECTOR_TYPE_INTERNVL, " internvl" },
130+ { PROJECTOR_TYPE_LLAMA4, " llama4" },
128131};
129132
130133static projector_type clip_projector_type_from_string (const std::string & str) {
@@ -240,6 +243,11 @@ struct clip_image_u8_batch {
240243struct clip_image_f32_batch {
241244 std::vector<clip_image_f32_ptr> entries;
242245
246+ // for llava-uhd style models, we need to know the grid size
247+ // note: entries.size() == grid_x * grid_y + 1 (one overview image)
248+ int grid_x = 0 ;
249+ int grid_y = 0 ;
250+
243251 clip_image_f32_batch clone () const {
244252 clip_image_f32_batch new_batch;
245253 new_batch.entries .reserve (entries.size ());
@@ -358,6 +366,70 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
358366 }
359367}
360368
369+ //
370+ // debugging
371+ //
372+
373+ static void print_tensor_shape (ggml_tensor * t) {
374+ printf (" %s.shape = [" , t->name );
375+ for (int i = 0 ; i < ggml_n_dims (t); ++i) {
376+ printf (" %" PRId64, t->ne [i]);
377+ if (i < ggml_n_dims (t) - 1 ) {
378+ printf (" , " );
379+ }
380+ }
381+ printf (" ]\n " );
382+ }
383+
384+ static void print_tensor_data (ggml_tensor * t, uint8_t * data, int64_t n) {
385+ ggml_type type = t->type ;
386+ int64_t * ne = t->ne ;
387+ size_t * nb = t->nb ;
388+ for (int64_t i3 = 0 ; i3 < ne[3 ]; i3++) {
389+ printf (" %s.data: [\n " , t->name );
390+ for (int64_t i2 = 0 ; i2 < ne[2 ]; i2++) {
391+ if (i2 == n && ne[2 ] > 2 *n) {
392+ printf (" ..., \n " );
393+ i2 = ne[2 ] - n;
394+ }
395+ printf (" [\n " );
396+ for (int64_t i1 = 0 ; i1 < ne[1 ]; i1++) {
397+ if (i1 == n && ne[1 ] > 2 *n) {
398+ printf (" ..., \n " );
399+ i1 = ne[1 ] - n;
400+ }
401+ printf (" [" );
402+ for (int64_t i0 = 0 ; i0 < ne[0 ]; i0++) {
403+ if (i0 == n && ne[0 ] > 2 *n) {
404+ printf (" ..., " );
405+ i0 = ne[0 ] - n;
406+ }
407+ size_t i = i3 * nb[3 ] + i2 * nb[2 ] + i1 * nb[1 ] + i0 * nb[0 ];
408+ float v;
409+ if (type == GGML_TYPE_F16) {
410+ v = ggml_fp16_to_fp32 (*(ggml_fp16_t *) &data[i]);
411+ } else if (type == GGML_TYPE_F32) {
412+ v = *(float *) &data[i];
413+ } else if (type == GGML_TYPE_I32) {
414+ v = (float ) *(int32_t *) &data[i];
415+ } else if (type == GGML_TYPE_I16) {
416+ v = (float ) *(int16_t *) &data[i];
417+ } else if (type == GGML_TYPE_I8) {
418+ v = (float ) *(int8_t *) &data[i];
419+ } else {
420+ GGML_ABORT (" fatal error" );
421+ }
422+ printf (" %8.4f" , v);
423+ if (i0 < ne[0 ] - 1 ) printf (" , " );
424+ }
425+ printf (" ],\n " );
426+ }
427+ printf (" ],\n " );
428+ }
429+ printf (" ]\n " );
430+ }
431+ }
432+
361433//
362434// API used internally with mtmd
363435//
0 commit comments