Thireus
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 5 deletions b/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎common/chat-parser.h‎
Lines changed: 0 additions & 1 deletion b/‎common/chat-parser.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎common/chat.cpp‎
Lines changed: 2 additions & 1 deletion b/‎common/chat.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎common/chat.h‎
Lines changed: 2 additions & 1 deletion b/‎common/chat.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎common/common.cpp‎
Lines changed: 10 additions & 2 deletions b/‎common/common.cpp‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎common/common.h‎
Lines changed: 35 additions & 2 deletions b/‎common/common.h‎
Lines changed: 35 additions & 2 deletions
diff --git a/‎common/json-partial.cpp‎
Lines changed: 0 additions & 2 deletions b/‎common/json-partial.cpp‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎common/json-partial.h‎
Lines changed: 1 addition & 1 deletion b/‎common/json-partial.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/json-schema-to-grammar.h‎
Lines changed: 1 addition & 1 deletion b/‎common/json-schema-to-grammar.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/sampling.cpp‎
Lines changed: 1 addition & 1 deletion b/‎common/sampling.cpp‎
Lines changed: 1 addition & 1 deletion
@@ -65,15 +65,12 @@ add_library(${TARGET} STATIC
     console.cpp
     grammar-parser.h
     grammar-parser.cpp
-    json.hpp
     json-partial.h
     json-partial.cpp
     llguidance.cpp
     json-schema-to-grammar.cpp
     train.h
     train.cpp
-    minja/chat-template.hpp
-    minja/minja.hpp
     ngram-cache.h
     ngram-cache.cpp
     speculative.cpp
@@ -123,6 +120,6 @@ if (LLAMA_LLGUIDANCE)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
 endif ()
 
-target_include_directories(${TARGET} PUBLIC .)
-target_compile_features   (${TARGET} PUBLIC cxx_std_11)
+target_include_directories(${TARGET} PUBLIC . ../vendor)
+target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
@@ -2,7 +2,6 @@
 
 #include "chat.h"
 #include "json-partial.h"
-#include "json.hpp"
 #include "regex-partial.h"
 
 #include <optional>
 
@@ -489,11 +489,12 @@ std::string common_chat_format_single(
     return ss.str();
 }
 
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja) {
+std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
     common_chat_templates_inputs inputs;
     inputs.use_jinja = use_jinja;
     inputs.add_bos = tmpls->add_bos;
     inputs.add_eos = tmpls->add_eos;
+    inputs.chat_template_kwargs = chat_template_kwargs;
     auto add_simple_msg = [&](auto role, auto content) {
         common_chat_msg msg;
         msg.role = role;
 
@@ -188,7 +188,8 @@ std::string common_chat_format_single(
 // Returns an example of formatted chat
 std::string common_chat_format_example(
     const struct common_chat_templates * tmpls,
-    bool use_jinja);
+    bool use_jinja,
+    const std::map<std::string, std::string> & chat_template_kwargs);
 
 const char*               common_chat_format_name(common_chat_format format);
 const char*               common_reasoning_format_name(common_reasoning_format format);
 
@@ -12,7 +12,6 @@
 #include "common.h"
 // Change JSON_ASSERT from assert() to GGML_ASSERT:
 #define JSON_ASSERT GGML_ASSERT
-#include "json.hpp"
 #include "llama-vocab.h"
 #include "llama.h"
 #include "chat.h"
@@ -899,7 +898,16 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
     }
     if (arg == "--mmproj") {
         CHECK_ARG
-        params.mmproj = argv[i];
+        params.mmproj.path = argv[i];
+        return true;
+    }
+    if (arg == "--mmproj-url") {
+        CHECK_ARG
+        params.mmproj.url = argv[i];
+        return true;
+    }
+    if (arg == "--no-mmproj-offload") {
+        params.mmproj_use_gpu = false;
         return true;
     }
     if (arg == "--image") {
 
@@ -68,6 +68,29 @@ struct llama_control_vector_load_info;
 int32_t cpu_get_num_physical_cores();
 int32_t cpu_get_num_math();
 
+enum llama_example {
+    LLAMA_EXAMPLE_COMMON,
+    LLAMA_EXAMPLE_SPECULATIVE,
+    LLAMA_EXAMPLE_MAIN,
+    LLAMA_EXAMPLE_EMBEDDING,
+    LLAMA_EXAMPLE_PERPLEXITY,
+    LLAMA_EXAMPLE_RETRIEVAL,
+    LLAMA_EXAMPLE_PASSKEY,
+    LLAMA_EXAMPLE_IMATRIX,
+    LLAMA_EXAMPLE_BENCH,
+    LLAMA_EXAMPLE_SERVER,
+    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
+    LLAMA_EXAMPLE_EXPORT_LORA,
+    LLAMA_EXAMPLE_MTMD,
+    LLAMA_EXAMPLE_LOOKUP,
+    LLAMA_EXAMPLE_PARALLEL,
+    LLAMA_EXAMPLE_TTS,
+    LLAMA_EXAMPLE_DIFFUSION,
+    LLAMA_EXAMPLE_FINETUNE,
+
+    LLAMA_EXAMPLE_COUNT,
+};
+
 //
 // CLI argument parsing
 //
@@ -86,6 +109,14 @@ enum common_reasoning_format {
     COMMON_REASONING_FORMAT_DEEPSEEK,        // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
 };
 
+struct model_paths {
+    std::string path        = ""; // model local path                                       // NOLINT
+    std::string url         = ""; // model url to download                                  // NOLINT
+    std::string hf_repo     = ""; // HF repo                                                // NOLINT
+    std::string hf_file     = ""; // HF file                                                // NOLINT
+    std::string docker_repo = ""; // Docker repo                                            // NOLINT
+};
+
 struct gpt_params {
     uint32_t seed                 = LLAMA_DEFAULT_SEED; // RNG seed
 
@@ -230,8 +261,10 @@ struct gpt_params {
     std::string cache_type_k_draft = ""; // KV cache data type for K for the draft model
     std::string cache_type_v_draft = ""; // KV cache data type for V for the draft model
 
-    // multimodal models (see examples/llava)
-    std::string mmproj = "";        // path to multimodal projector
+    // multimodal models (see examples/mtmd)
+    model_paths mmproj;
+    bool mmproj_use_gpu = true;     // use GPU for multimodal model
+    bool no_mmproj = false;         // explicitly disable multimodal model
     std::vector<std::string> image; // path to image file(s)
 
     // embedding
 
@@ -3,8 +3,6 @@
 #include "log.h"
 #include <string>
 
-#include <json.hpp>
-
 using json = nlohmann::ordered_json;
 
 enum common_json_stack_element_type {
 
@@ -1,5 +1,5 @@
 #pragma once
-#include <json.hpp>
+#include <nlohmann/json.hpp>
 
 // Healing marker (empty if the JSON was fully parsed / wasn't healed).
 struct common_healing_marker {
 
@@ -3,7 +3,7 @@
 #include "ggml.h"
 // Change JSON_ASSERT from assert() to GGML_ASSERT:
 #define JSON_ASSERT GGML_ASSERT
-#include "json.hpp"
+#include <nlohmann/json.hpp>
 
 std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
                                    bool force_gbnf = false);
 
@@ -3,7 +3,7 @@
 #include "llama-vocab.h"
 #include "common.h"
 #include <random>
-#include "json.hpp"
+#include <nlohmann/json.hpp>
 using json = nlohmann::ordered_json;
 
 struct llama_sampling_context * llama_sampling_init(const struct llama_vocab* vocab, const struct llama_sampling_params & params) {