File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -2753,8 +2753,11 @@ struct clip_model_loader {
27532753 // the actual max limit is 12845056/14/14/2/2/4 = 4096 tokens
27542754 // but we set a lower value to avoid OOM
27552755 // TODO: make it configurable by user
2756- hparams.set_limit_image_tokens (1 , 2048 );
2757- hparams.set_warmup_n_tokens (256 ); // avoid OOM on warmup
2756+ // TODO (2): bbox coordinates become inaccurate with small number of tokens,
2757+ // therefore we need to increase the min_tokens
2758+ // see: https://github.com/ggml-org/llama.cpp/issues/16842#issuecomment-3475144858
2759+ hparams.set_limit_image_tokens (256 , 2048 );
2760+ hparams.set_warmup_n_tokens (1024 ); // avoid OOM on warmup
27582761 } break ;
27592762 case PROJECTOR_TYPE_LLAMA4:
27602763 {
You can’t perform that action at this time.
0 commit comments