@@ -2796,11 +2796,12 @@ int GetThreadsToUse(bool blasmode)
27962796}
27972797
27982798// this function prepares the clip embds for llava. it's only needed when images change
2799- static void PrepareLlavaEmbds (const int nctx, const std::vector<int > & llava_sep)
2799+ static void PrepareLlavaEmbds (const int nctx, const std::vector<int > & llava_sep, const std::vector< int > & llava_intro )
28002800{
28012801 if (clp_ctx!=nullptr && clp_img_data!=nullptr )
28022802 {
28032803 int sepsize = llava_sep.size ();
2804+ int introsize = llava_intro.size ();
28042805 last_llava_mem.clear ();
28052806
28062807 for (int i=0 ;i<llava_images.size ();++i)
@@ -2829,6 +2830,10 @@ static void PrepareLlavaEmbds(const int nctx, const std::vector<int> & llava_sep
28292830 if (llava_images[i].clp_image_tokens >0 && llava_images[i].clp_image_tokens < nctx)
28302831 {
28312832 int tokcnt = (i==0 ?(llava_images[i].clp_image_tokens ):(llava_images[i].clp_image_tokens +sepsize));
2833+ if (i==0 )
2834+ {
2835+ tokcnt += introsize;
2836+ }
28322837 for (int n=0 ;n<tokcnt;++n)
28332838 {
28342839 last_llava_mem.push_back (current_llava_identifier);
@@ -3144,21 +3149,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31443149 std::vector<int > embd_inp;
31453150 std::vector<int > embd_inp_mem; // for storing added memory
31463151 std::vector<int > llava_sep; // to separate between different llava images
3152+ std::vector<int > llava_intro; // to separate between different llava images
31473153 bool llava_embds_built = false ;
31483154
31493155 int32_t nctx = kcpp_data->n_ctx ;
31503156
31513157 TokenizeString (kcpp_data->prompt , embd_inp, file_format, add_bos_token);
31523158 bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
31533159 TokenizeString (" \n\n " , llava_sep, file_format, false );
3160+ TokenizeString (" \n Images:\n " , llava_intro, file_format, false );
31543161
31553162 if (llava_composite_image_signature==" " )
31563163 {
31573164 last_llava_mem.clear ();
31583165 }
31593166 if (llava_images_changed)
31603167 {
3161- PrepareLlavaEmbds (nctx, llava_sep);
3168+ PrepareLlavaEmbds (nctx, llava_sep, llava_intro );
31623169 llava_embds_built = true ;
31633170 }
31643171
@@ -3872,7 +3879,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38723879 {
38733880 if (!llava_embds_built) // this should never happen! however, handle it anyway
38743881 {
3875- PrepareLlavaEmbds (nctx, llava_sep);
3882+ PrepareLlavaEmbds (nctx, llava_sep, llava_intro );
38763883 llava_embds_built = true ;
38773884 printf (" \n Somehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n " );
38783885 }
@@ -3888,6 +3895,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38883895 int llavatokenscounted = 0 ;
38893896 int llavatokensevaled = 0 ;
38903897 int sepsize = llava_sep.size ();
3898+ int introsize = llava_intro.size ();
38913899 while (input_consumed < embd_inp.size () && (embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_B))
38923900 {
38933901 if (!last_n_tokens.empty ())
@@ -3902,7 +3910,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39023910 for (int i=0 ;i<llava_images.size ();++i)
39033911 {
39043912 // note: no handling for draft_ctx as we don't support vision for it
3905- if (i>0 && sepsize>0 )
3913+ if (introsize>0 && i==0 )
3914+ {
3915+ // added at the start of everything
3916+ kcpp_embd_batch batch = kcpp_embd_batch (llava_intro, n_past, use_mrope, false );
3917+ auto evr = llama_decode (llama_ctx_v4, batch.batch );
3918+ if (evr!=0 )
3919+ {
3920+ printf (" \n Error when appending llava intro: %d\n " ,evr);
3921+ }
3922+ else
3923+ {
3924+ printf (" \r Processing LLaVa Intro (%d tokens)" ,introsize);
3925+ }
3926+ n_past += introsize;
3927+ llavatokensevaled += introsize;
3928+ }
3929+ if (sepsize>0 && i>0 )
39063930 {
39073931 // add a separator between each image
39083932 kcpp_embd_batch batch = kcpp_embd_batch (llava_sep, n_past, use_mrope, false );
0 commit comments