Skip to content

Commit 61a7334

Browse files
committed
fixed mrope for multiple images in qwen2vl (+1 squashed commits)
Squashed commits: [63e4d91c] fixed mrope for multiple images in qwen2vl (+1 squashed commits) Squashed commits: [bb78db1e] wip fixing mrope
1 parent 9116690 commit 61a7334

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,8 @@ gguf-split: examples/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o llama.o build-i
649649
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650650
gemma3-cli: examples/llava/gemma3-cli.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
651651
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
652+
qwen2vl-cli: examples/llava/qwen2vl-cli.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
653+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
652654

653655
ggml/src/ggml-vulkan-shaders.cpp:
654656
ifdef VULKAN_BUILD

gpttype_adapter.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3146,6 +3146,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31463146
int32_t nctx = kcpp_data->n_ctx;
31473147

31483148
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
3149+
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
31493150
TokenizeString("\n\n", llava_sep, file_format, false);
31503151

31513152
if(llava_composite_image_signature=="")
@@ -3446,7 +3447,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34463447
if(embd.size()!=1 || draft_ctx==nullptr || remaining_tokens<=speculative_chunk_amt || grammar!=nullptr || startedsampling==false) //for large batch, or if no draft model, PP/TG as usual
34473448
{
34483449
draft_used = false;
3449-
bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
34503450
kcpp_embd_batch batch = kcpp_embd_batch(embd, n_past, use_mrope, false);
34513451
evalres = (llama_decode(llama_ctx_v4, batch.batch)==0);
34523452
if(draft_ctx)
@@ -3871,7 +3871,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38713871
{
38723872
PrepareLlavaEmbds(nctx, llava_sep);
38733873
llava_embds_built = true;
3874-
printf("\nSomehow vision embd was not prepared, rebuilting it...\n");
3874+
printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
38753875
}
38763876

38773877
//if partial batch, dispatch existing first
@@ -3902,7 +3902,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39023902
if(i>0 && sepsize>0)
39033903
{
39043904
//add a separator between each image
3905-
auto evr = llama_decode(llama_ctx_v4, llama_batch_get_one(llava_sep.data(), sepsize));
3905+
kcpp_embd_batch batch = kcpp_embd_batch(llava_sep, n_past, use_mrope, false);
3906+
auto evr = llama_decode(llama_ctx_v4, batch.batch);
39063907
if(evr!=0)
39073908
{
39083909
printf("\nError when appending llava separator: %d\n",evr);

0 commit comments

Comments
 (0)