@@ -3146,6 +3146,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31463146 int32_t nctx = kcpp_data->n_ctx ;
31473147
31483148 TokenizeString (kcpp_data->prompt , embd_inp, file_format, add_bos_token);
3149+ bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
31493150 TokenizeString (" \n\n " , llava_sep, file_format, false );
31503151
31513152 if (llava_composite_image_signature==" " )
@@ -3446,7 +3447,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34463447 if (embd.size ()!=1 || draft_ctx==nullptr || remaining_tokens<=speculative_chunk_amt || grammar!=nullptr || startedsampling==false ) // for large batch, or if no draft model, PP/TG as usual
34473448 {
34483449 draft_used = false ;
3449- bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
34503450 kcpp_embd_batch batch = kcpp_embd_batch (embd, n_past, use_mrope, false );
34513451 evalres = (llama_decode (llama_ctx_v4, batch.batch )==0 );
34523452 if (draft_ctx)
@@ -3871,7 +3871,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38713871 {
38723872 PrepareLlavaEmbds (nctx, llava_sep);
38733873 llava_embds_built = true ;
3874- printf (" \n Somehow vision embd was not prepared, rebuilting it...\n " );
3874+ printf (" \n Somehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n " );
38753875 }
38763876
38773877 // if partial batch, dispatch existing first
@@ -3902,7 +3902,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39023902 if (i>0 && sepsize>0 )
39033903 {
39043904 // add a separator between each image
3905- auto evr = llama_decode (llama_ctx_v4, llama_batch_get_one (llava_sep.data (), sepsize));
3905+ kcpp_embd_batch batch = kcpp_embd_batch (llava_sep, n_past, use_mrope, false );
3906+ auto evr = llama_decode (llama_ctx_v4, batch.batch );
39063907 if (evr!=0 )
39073908 {
39083909 printf (" \n Error when appending llava separator: %d\n " ,evr);
0 commit comments