Skip to content

Commit 92bb84f

Browse files
authored
mtmd: allow QwenVL to process larger image by default (ggml-org#17020)
1 parent 13b339b commit 92bb84f

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

tools/mtmd/clip.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,14 +2791,8 @@ struct clip_model_loader {
27912791
get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
27922792
get_u32(KEY_WIN_ATTN_PATTERN, hparams.n_wa_pattern, model.proj_type == PROJECTOR_TYPE_QWEN25VL); // only 2.5 requires it
27932793
// ref: https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct/blob/main/preprocessor_config.json
2794-
// the actual max limit is 12845056/14/14/2/2/4 = 4096 tokens
2795-
// but we set a lower value to avoid OOM
2796-
// TODO: make it configurable by user
2797-
// TODO (2): bbox coordinates become inaccurate with small number of tokens,
2798-
// therefore we need to increase the min_tokens
2799-
// see: https://github.com/ggml-org/llama.cpp/issues/16842#issuecomment-3475144858
2800-
hparams.set_limit_image_tokens(8, 2048);
2801-
hparams.set_warmup_n_tokens(256); // avoid OOM on warmup
2794+
hparams.set_limit_image_tokens(8, 4096);
2795+
hparams.set_warmup_n_tokens(46*46); // avoid OOM on warmup
28022796
const int warn_min_pixels = 1024 * hparams.n_merge * hparams.n_merge * hparams.patch_size * hparams.patch_size;
28032797
if (hparams.image_min_pixels < warn_min_pixels) {
28042798
LOG_WRN("%s: Qwen-VL models require at minimum 1024 image tokens to function correctly on grounding tasks\n", __func__);
@@ -4814,7 +4808,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
48144808
case PROJECTOR_TYPE_QWEN2VL:
48154809
case PROJECTOR_TYPE_QWEN3VL:
48164810
{
4817-
const int merge_ratio = 2;
4811+
const int merge_ratio = hparams.n_merge;
48184812
const int pw = image_size_width / patch_size;
48194813
const int ph = image_size_height / patch_size;
48204814
std::vector<int> positions(n_pos * 4);

0 commit comments

Comments
 (0)