rm -sys support, added TODO

ngxson · ngxson · commit 283f785c4536 · 2025-10-22T14:58:51.000+02:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2000,7 +2000,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params, const std::string & value) {
             params.system_prompt = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION, LLAMA_EXAMPLE_MTMD}));
+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION}));
     add_opt(common_arg(
         {"--no-perf"},
         string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp
@@ -80,7 +80,7 @@ struct mtmd_cli_context {
     common_chat_templates_ptr tmpls;
     std::vector<common_chat_msg> chat_history;
     bool use_jinja = false;
-    std::string system_prompt;
+    // TODO: support for --system-prompt with /clear command
 
     // support for legacy templates (models not having EOT token)
     llama_tokens antiprompt_tokens;
@@ -111,8 +111,7 @@ struct mtmd_cli_context {
 
         tmpls = common_chat_templates_init(model, params.chat_template);
         use_jinja = params.use_jinja;
-        system_prompt = params.system_prompt;
-        reset_chat_history();
+        chat_history.clear();
         LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(tmpls.get(), params.use_jinja, params.default_template_kwargs).c_str());
 
         init_vision_context(params);
@@ -130,16 +129,6 @@ struct mtmd_cli_context {
         common_sampler_free(smpl);
     }
 
-    void reset_chat_history() {
-        chat_history.clear();
-        if (!system_prompt.empty()) {
-            common_chat_msg sys_msg;
-            sys_msg.role    = "system";
-            sys_msg.content = system_prompt;
-            chat_history.push_back(std::move(sys_msg));
-        }
-    }
-
     void init_vision_context(common_params & params) {
         const char * clip_path = params.mmproj.path.c_str();
         mtmd_context_params mparams = mtmd_context_params_default();
@@ -228,7 +217,8 @@ static std::string chat_add_and_format(mtmd_cli_context & ctx, common_chat_msg &
     return formatted;
 }
 
-static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, bool add_bos = false) {
+static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg) {
+    bool add_bos = ctx.chat_history.empty();
     auto formatted_chat = chat_add_and_format(ctx, msg);
     LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
 
@@ -331,7 +321,7 @@ int main(int argc, char ** argv) {
                 return 1; // error is already printed by libmtmd
             }
         }
-        if (eval_message(ctx, msg, true)) {
+        if (eval_message(ctx, msg)) {
             return 1;
         }
         if (!g_is_interrupted && generate_response(ctx, n_predict)) {
@@ -350,7 +340,6 @@ int main(int argc, char ** argv) {
         LOG("\n   /quit or /exit   exit the program");
         LOG("\n");
 
-        bool is_first_msg = true;
         std::string content;
 
         while (!g_is_interrupted) {
@@ -370,7 +359,7 @@ int main(int argc, char ** argv) {
             }
             if (line == "/clear") {
                 ctx.n_past = 0;
-                ctx.reset_chat_history();
+                ctx.chat_history.clear();
                 llama_memory_clear(llama_get_memory(ctx.lctx), true);
                 LOG("Chat history cleared\n\n");
                 continue;
@@ -396,7 +385,7 @@ int main(int argc, char ** argv) {
             common_chat_msg msg;
             msg.role = "user";
             msg.content = content;
-            int ret = eval_message(ctx, msg, is_first_msg);
+            int ret = eval_message(ctx, msg);
             if (ret) {
                 return 1;
             }
@@ -405,7 +394,6 @@ int main(int argc, char ** argv) {
                 return 1;
             }
             content.clear();
-            is_first_msg = false;
         }
     }
     if (g_is_interrupted) LOG("\nInterrupted by user\n");

Original file line number	Diff line number	Diff line change
`@@ -2000,7 +2000,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex`
`2000`	`2000`	`[](common_params & params, const std::string & value) {`
`2001`	`2001`	`params.system_prompt = value;`
`2002`	`2002`	`}`
`2003`		`- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION, LLAMA_EXAMPLE_MTMD}));`
	`2003`	`+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION}));`
`2004`	`2004`	`add_opt(common_arg(`
`2005`	`2005`	`{"--no-perf"},`
`2006`	`2006`	`string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),`