@@ -82,9 +82,11 @@ struct mtmd_cli_context {
8282
8383 mtmd::bitmaps bitmaps;
8484
85- // note: we know that gemma3 template is "linear", meaning each turn is completely separated to another
86- // so here we don't need to keep track of chat history
85+ // chat template
8786 common_chat_templates_ptr tmpls;
87+ std::vector<common_chat_msg> chat_history;
88+ bool use_jinja = false ;
89+ // TODO: support for --system-prompt with /clear command
8890
8991 // support for legacy templates (models not having EOT token)
9092 llama_tokens antiprompt_tokens;
@@ -114,6 +116,8 @@ struct mtmd_cli_context {
114116 }
115117
116118 tmpls = common_chat_templates_init (model, params.chat_template );
119+ use_jinja = params.use_jinja ;
120+ chat_history.clear ();
117121 LOG_INF (" %s: chat template example:\n %s\n " , __func__, common_chat_format_example (tmpls.get (), params.use_jinja , params.default_template_kwargs ).c_str ());
118122
119123 init_vision_context (params);
@@ -324,19 +328,33 @@ static int generate_response(mtmd_cli_context & ctx, int n_predict) {
324328 return 1 ;
325329 }
326330 }
331+
332+ std::string generated_text = common_detokenize (ctx.lctx , generated_tokens);
333+ common_chat_msg msg;
334+ msg.role = " assistant" ;
335+ msg.content = generated_text;
336+ ctx.chat_history .push_back (std::move (msg));
337+
327338 return 0 ;
328339}
329340
330- static int eval_message (mtmd_cli_context & ctx, common_chat_msg & msg, bool add_bos = false ) {
331- common_chat_templates_inputs tmpl_inputs;
332- tmpl_inputs.messages = {msg};
333- tmpl_inputs.add_generation_prompt = true ;
334- tmpl_inputs.use_jinja = false ; // jinja is buggy here
335- auto formatted_chat = common_chat_templates_apply (ctx.tmpls .get (), tmpl_inputs);
336- LOG_DBG (" formatted_chat.prompt: %s\n " , formatted_chat.prompt .c_str ());
341+ static std::string chat_add_and_format (mtmd_cli_context & ctx, common_chat_msg & new_msg) {
342+ LOG_DBG (" chat_add_and_format: new_msg.role='%s', new_msg.content='%s'\n " ,
343+ new_msg.role .c_str (), new_msg.content .c_str ());
344+ auto formatted = common_chat_format_single (ctx.tmpls .get (), ctx.chat_history ,
345+ new_msg, new_msg.role == " user" ,
346+ ctx.use_jinja );
347+ ctx.chat_history .push_back (new_msg);
348+ return formatted;
349+ }
350+
351+ static int eval_message (mtmd_cli_context & ctx, common_chat_msg & msg) {
352+ bool add_bos = ctx.chat_history .empty ();
353+ auto formatted_chat = chat_add_and_format (ctx, msg);
354+ LOG_DBG (" formatted_chat.prompt: %s\n " , formatted_chat.c_str ());
337355
338356 mtmd_input_text text;
339- text.text = formatted_chat.prompt . c_str ();
357+ text.text = formatted_chat.c_str ();
340358 text.add_special = add_bos;
341359 text.parse_special = true ;
342360
@@ -446,7 +464,7 @@ int main(int argc, char ** argv) {
446464 return 1 ; // error is already printed by libmtmd
447465 }
448466 }
449- if (eval_message (ctx, msg, true )) {
467+ if (eval_message (ctx, msg)) {
450468 return 1 ;
451469 }
452470 if (!g_is_interrupted && generate_response (ctx, n_predict)) {
@@ -465,7 +483,6 @@ int main(int argc, char ** argv) {
465483 LOG (" \n /quit or /exit exit the program" );
466484 LOG (" \n " );
467485
468- bool is_first_msg = true ;
469486 std::string content;
470487
471488 while (!g_is_interrupted) {
@@ -485,7 +502,8 @@ int main(int argc, char ** argv) {
485502 }
486503 if (line == " /clear" ) {
487504 ctx.n_past = 0 ;
488- llama_memory_seq_rm (llama_get_memory (ctx.lctx ), 0 , 1 , -1 ); // keep BOS
505+ ctx.chat_history .clear ();
506+ llama_memory_clear (llama_get_memory (ctx.lctx ), true );
489507 LOG (" Chat history cleared\n\n " );
490508 continue ;
491509 }
@@ -510,7 +528,7 @@ int main(int argc, char ** argv) {
510528 common_chat_msg msg;
511529 msg.role = " user" ;
512530 msg.content = content;
513- int ret = eval_message (ctx, msg, is_first_msg );
531+ int ret = eval_message (ctx, msg);
514532 if (ret) {
515533 return 1 ;
516534 }
@@ -519,7 +537,6 @@ int main(int argc, char ** argv) {
519537 return 1 ;
520538 }
521539 content.clear ();
522- is_first_msg = false ;
523540 }
524541 }
525542 if (g_is_interrupted) LOG (" \n Interrupted by user\n " );
0 commit comments