Use smart pointers in simple-chat

ericcurtin · ericcurtin · commit dd13be8c41b6 · 2024-11-14T11:47:55.000Z
Avoid manual memory cleanups. Less memory leaks in the code now.

Signed-off-by: Eric Curtin &lt;ecurtin@redhat.com&gt;
diff --git a/examples/simple-chat/CMakeLists.txt b/examples/simple-chat/CMakeLists.txt
@@ -2,4 +2,4 @@ set(TARGET llama-simple-chat)
 add_executable(${TARGET} simple-chat.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_compile_features(${TARGET} PRIVATE cxx_std_14)
diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp
@@ -5,6 +5,33 @@
 #include <string>
 #include <vector>
 
+// Add a message to `messages` and store its content in `owned_content`
+static void add_message(const std::string &role, const std::string &text,
+                        std::vector<llama_chat_message> &messages,
+                        std::vector<std::unique_ptr<char[]>> &owned_content) {
+  auto content = std::make_unique<char[]>(text.size() + 1);
+  std::strcpy(content.get(), text.c_str());
+  messages.push_back({role.c_str(), content.get()});
+  owned_content.push_back(std::move(content));
+}
+
+// Function to apply the chat template and resize `formatted` if needed
+static int apply_chat_template(llama_model *model,
+                               const std::vector<llama_chat_message> &messages,
+                               std::vector<char> &formatted, bool append) {
+  int result = llama_chat_apply_template(model, nullptr, messages.data(),
+                                         messages.size(), append,
+                                         formatted.data(), formatted.size());
+  if (result > static_cast<int>(formatted.size())) {
+    formatted.resize(result);
+    result = llama_chat_apply_template(model, nullptr, messages.data(),
+                                       messages.size(), append,
+                                       formatted.data(), formatted.size());
+  }
+
+  return result;
+}
+
 static void print_usage(int, char ** argv) {
     printf("\nexample usage:\n");
     printf("\n    %s -m model.gguf [-c context_size] [-ngl n_gpu_layers]\n", argv[0]);
@@ -66,6 +93,7 @@ int main(int argc, char ** argv) {
     llama_model_params model_params = llama_model_default_params();
     model_params.n_gpu_layers = ngl;
 
+    // This prints ........
     llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params);
     if (!model) {
         fprintf(stderr , "%s: error: unable to load model\n" , __func__);
@@ -144,51 +172,43 @@ int main(int argc, char ** argv) {
     };
 
     std::vector<llama_chat_message> messages;
+    std::vector<std::unique_ptr<char[]>> owned_content;
     std::vector<char> formatted(llama_n_ctx(ctx));
     int prev_len = 0;
     while (true) {
-        // get user input
-        printf("\033[32m> \033[0m");
-        std::string user;
-        std::getline(std::cin, user);
-
-        if (user.empty()) {
-            break;
-        }
-
-        // add the user input to the message list and format it
-        messages.push_back({"user", strdup(user.c_str())});
-        int new_len = llama_chat_apply_template(model, nullptr, messages.data(), messages.size(), true, formatted.data(), formatted.size());
-        if (new_len > (int)formatted.size()) {
-            formatted.resize(new_len);
-            new_len = llama_chat_apply_template(model, nullptr, messages.data(), messages.size(), true, formatted.data(), formatted.size());
-        }
-        if (new_len < 0) {
-            fprintf(stderr, "failed to apply the chat template\n");
-            return 1;
-        }
+      // get user input
+      printf("\033[32m> \033[0m");
+      std::string user;
+      std::getline(std::cin, user);
+      if (user.empty()) {
+        break;
+      }
+
+      // Add user input to messages
+      add_message("user", user, messages, owned_content);
+      int new_len = apply_chat_template(model, messages, formatted, true);
+      if (new_len < 0) {
+        fprintf(stderr, "failed to apply the chat template\n");
+        return 1;
+      }
 
-        // remove previous messages to obtain the prompt to generate the response
-        std::string prompt(formatted.begin() + prev_len, formatted.begin() + new_len);
+      // remove previous messages to obtain the prompt to generate the response
+      std::string prompt(formatted.begin() + prev_len,
+                         formatted.begin() + new_len);
 
-        // generate a response
-        printf("\033[33m");
-        std::string response = generate(prompt);
-        printf("\n\033[0m");
+      // generate a response
+      printf("\033[33m");
+      std::string response = generate(prompt);
+      printf("\n\033[0m");
 
-        // add the response to the messages
-        messages.push_back({"assistant", strdup(response.c_str())});
-        prev_len = llama_chat_apply_template(model, nullptr, messages.data(), messages.size(), false, nullptr, 0);
-        if (prev_len < 0) {
-            fprintf(stderr, "failed to apply the chat template\n");
-            return 1;
-        }
+      // Add response to messages
+      prev_len = apply_chat_template(model, messages, formatted, false);
+      if (prev_len < 0) {
+        fprintf(stderr, "failed to apply the chat template\n");
+        return 1;
+      }
     }
 
-    // free resources
-    for (auto & msg : messages) {
-        free(const_cast<char *>(msg.content));
-    }
     llama_sampler_free(smpl);
     llama_free(ctx);
     llama_free_model(model);