Skip to content

Commit 2f0c2db

Browse files
authored
mtmd: improve struct initialization (ggml-org#16981)
1 parent fd2f84f commit 2f0c2db

File tree

2 files changed

+19
-17
lines changed

2 files changed

+19
-17
lines changed

tools/mtmd/clip.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,6 +2761,7 @@ struct clip_model_loader {
27612761
{
27622762
// ref: https://huggingface.co/mistral-community/pixtral-12b/blob/main/preprocessor_config.json
27632763
// TODO: verify the image_min_tokens
2764+
hparams.n_merge = 1; // the original pixtral does not use patch merging
27642765
hparams.rope_theta = 10000.0f;
27652766
get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
27662767
hparams.set_limit_image_tokens(8, 1024);

tools/mtmd/mtmd.cpp

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -101,16 +101,17 @@ static clip_flash_attn_type mtmd_get_clip_flash_attn_type(enum llama_flash_attn_
101101
}
102102

103103
mtmd_context_params mtmd_context_params_default() {
104-
mtmd_context_params params;
105-
params.use_gpu = true;
106-
params.print_timings = true;
107-
params.n_threads = 4;
108-
params.verbosity = GGML_LOG_LEVEL_INFO;
109-
params.image_marker = MTMD_DEFAULT_IMAGE_MARKER;
110-
params.media_marker = mtmd_default_marker();
111-
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
112-
params.image_min_tokens = -1;
113-
params.image_max_tokens = -1;
104+
mtmd_context_params params {
105+
/* use_gpu */ true,
106+
/* print_timings */ true,
107+
/* n_threads */ 4,
108+
/* verbosity */ GGML_LOG_LEVEL_INFO,
109+
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
110+
/* media_marker */ mtmd_default_marker(),
111+
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
112+
/* image_min_tokens */ -1,
113+
/* image_max_tokens */ -1,
114+
};
114115
return params;
115116
}
116117

@@ -172,13 +173,13 @@ struct mtmd_context {
172173
throw std::runtime_error("media_marker must not be empty");
173174
}
174175

175-
clip_context_params ctx_clip_params;
176-
ctx_clip_params.use_gpu = ctx_params.use_gpu;
177-
ctx_clip_params.verbosity = ctx_params.verbosity;
178-
ctx_clip_params.flash_attn_type = mtmd_get_clip_flash_attn_type(ctx_params.flash_attn_type);
179-
// custom image token limits
180-
ctx_clip_params.image_min_tokens = ctx_params.image_min_tokens;
181-
ctx_clip_params.image_max_tokens = ctx_params.image_max_tokens;
176+
clip_context_params ctx_clip_params {
177+
/* use_gpu */ ctx_params.use_gpu,
178+
/* verbosity */ ctx_params.verbosity,
179+
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
180+
/* image_min_tokens */ ctx_params.image_min_tokens,
181+
/* image_max_tokens */ ctx_params.image_max_tokens,
182+
};
182183

183184
auto res = clip_init(mmproj_fname, ctx_clip_params);
184185
ctx_v = res.ctx_v;

0 commit comments

Comments
 (0)