fix: return back the assistant role to the scheme, ref #65

pminev · pminev · commit bb2ca87f90a8 · 2025-05-08T16:28:42.000+03:00
diff --git a/ac-local-plugin/code/LocalLlama.cpp b/ac-local-plugin/code/LocalLlama.cpp
@@ -52,7 +52,8 @@ class ChatSession {
     llama::Instance& m_instance;
     IoEndpoint& m_io;
 
-    std::string m_userPrefix;
+    std::string m_roleUser;
+    std::string m_roleAsistant;
     std::unique_ptr<llama::ChatFormat> m_chatFormat;
     std::vector<llama::ChatMsg> m_chatMessages;
     size_t m_submittedMessages = 0;
@@ -82,9 +83,8 @@ class ChatSession {
         auto promptTokens = instance.model().vocab().tokenize(params.setup.value(), true, true);
         m_session.setInitialPrompt(promptTokens);
 
-        m_userPrefix = "\n";
-        m_userPrefix += params.roleUser;
-        m_userPrefix += ":";
+        m_roleUser = params.roleUser;
+        m_roleAsistant = params.roleAssistant;
     }
 
     ~ChatSession() {
@@ -132,8 +132,10 @@ class ChatSession {
         }
 
         ac::llama::AntipromptManager antiprompt;
-        antiprompt.addAntiprompt(m_userPrefix);
+        auto userPrefix = "\n" + m_roleUser + ": ";
+        antiprompt.addAntiprompt(userPrefix);
 
+        std::string fullResponse;
         Schema::OpGetChatResponse::Return ret;
         auto& result = ret.response.materialize();
 
@@ -146,6 +148,7 @@ class ChatSession {
 
             auto tokenStr = m_vocab.tokenToString(t);
             result += tokenStr;
+            fullResponse += tokenStr;
 
             auto matchedAntiPrompt = antiprompt.feedGeneratedText(tokenStr);
             if (!matchedAntiPrompt.empty()) {
@@ -167,6 +170,7 @@ class ChatSession {
         // with a leading space, so instead of burdening them with "unorthodox" tokens, we'll clear it here
         if (!result.empty() && result[0] == ' ') {
             result.erase(0, 1);
+            fullResponse.erase(0, 1);
         }
 
         if (isStreaming) {
@@ -180,6 +184,8 @@ class ChatSession {
                 .response = std::move(result)
             }));
         }
+
+        m_chatMessages.push_back({.role = m_roleAsistant, .text = std::move(fullResponse)});
     }
 };
 
diff --git a/ac-local-plugin/example/ep-chat.cpp b/ac-local-plugin/example/ep-chat.cpp
@@ -43,6 +43,7 @@ int main() try {
         .instanceType = "chat",
         .setup = "A chat between a human user and a helpful AI assistant.",
         .roleUser = roleUser,
+        .roleAssistant = roleAssistant,
     });
     std::cout << "Instance started: " << sid << '\n';
 
diff --git a/ac-local-plugin/schema/ac/schema/LlamaCpp.hpp b/ac-local-plugin/schema/ac/schema/LlamaCpp.hpp
@@ -86,6 +86,7 @@ struct StateModelLoaded {
         Field<std::string> bosOverride = Default();
         Field<std::string> eosOverride = Default();
         Field<std::string> roleUser = Default("User");
+        Field<std::string> roleAssistant = Default("Assistant");
 
         template <typename Visitor>
         void visitFields(Visitor& v) {