@@ -2777,7 +2777,10 @@ struct server_context {
27772777                } break ;
27782778            case  SERVER_TASK_TYPE_SLOT_SAVE:
27792779                {
2780-                     if  (!ensure_no_mtmd (task.id )) break ;
2780+                     if  (!ensure_no_mtmd (task.id )) {
2781+                         break ;
2782+                     }
2783+ 
27812784                    int  id_slot = task.slot_action .slot_id ;
27822785                    server_slot * slot = get_slot_by_id (id_slot);
27832786                    if  (slot == nullptr ) {
@@ -3269,7 +3272,7 @@ struct server_context {
32693272
32703273                        //  Process all prompt tokens through sampler system
32713274                        for  (size_t  i = 0 ; i < slot.cache_tokens .size (); ++i) {
3272-                             llama_token id = slot.prompt_tokens [i];
3275+                             llama_token id = slot.cache_tokens [i];
32733276                            if  (id != LLAMA_TOKEN_NULL) {
32743277                                common_sampler_accept (slot.smpl , id, false );
32753278                            }
@@ -3491,7 +3494,7 @@ struct server_context {
34913494                slot.n_draft_accepted  += ids.size () - 1 ;
34923495
34933496                slot.cache_tokens .push_back (id);
3494-                 slot.cache_tokens .insert (ids);
3497+                 slot.cache_tokens .insert ({ ids. begin (), ids. end () -  1 } );
34953498
34963499                llama_kv_self_seq_rm (ctx, slot.id , slot.n_past , -1 );
34973500
@@ -4105,8 +4108,9 @@ int main(int argc, char ** argv) {
41054108            std::vector<server_tokens> inputs;
41064109            if  (oaicompat && !prompt.is_string ()) {
41074110                throw  std::runtime_error (" prompt must be a string"  );
4111+             }
41084112
4109-             }  else   if  (oaicompat && has_mtmd) {
4113+             if  (oaicompat && has_mtmd) {
41104114                //  multimodal
41114115                std::string prompt_str = prompt.get <std::string>();
41124116                mtmd_input_text inp_txt = {
@@ -4124,9 +4128,9 @@ int main(int argc, char ** argv) {
41244128                if  (tokenized != 0 ) {
41254129                    throw  std::runtime_error (" Failed to tokenize prompt"  );
41264130                }
4131+ 
41274132                server_tokens tmp (chunks, true );
41284133                inputs.push_back (std::move (tmp));
4129- 
41304134            } else  {
41314135                //  non-multimodal version
41324136                auto  tokenized_prompts = tokenize_input_prompts (ctx_server.vocab , prompt, true , true );
0 commit comments