ggml-org
diff --git a/‎.editorconfig‎
Lines changed: 1 addition & 1 deletion b/‎.editorconfig‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-linux-cross.yml‎
Lines changed: 15 additions & 15 deletions b/‎.github/workflows/build-linux-cross.yml‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 5 additions & 8 deletions b/‎common/CMakeLists.txt‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 21 additions & 16 deletions b/‎common/arg.cpp‎
Lines changed: 21 additions & 16 deletions
diff --git a/‎common/chat-parser.cpp‎
Lines changed: 4 additions & 3 deletions b/‎common/chat-parser.cpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎common/chat-parser.h‎
Lines changed: 2 additions & 1 deletion b/‎common/chat-parser.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎common/chat.cpp‎
Lines changed: 4 additions & 4 deletions b/‎common/chat.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎common/common.cpp‎
Lines changed: 2 additions & 0 deletions b/‎common/common.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/json-partial.cpp‎
Lines changed: 5 additions & 4 deletions b/‎common/json-partial.cpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎common/json-partial.h‎
Lines changed: 2 additions & 1 deletion b/‎common/json-partial.h‎
Lines changed: 2 additions & 1 deletion
@@ -49,6 +49,6 @@ charset = unset
 trim_trailing_whitespace = unset
 insert_final_newline = unset
 
-[tools/mtmd/miniaudio.h]
+[vendor/miniaudio/miniaudio.h]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
@@ -26,12 +26,12 @@ jobs:
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   gcc-14-riscv64-linux-gnu \
-                  g++-14-riscv64-linux-gnu \
-                  libcurl4-openssl-dev:riscv64
+                  g++-14-riscv64-linux-gnu
 
       - name: Build
         run: |
-          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+          cmake -B build -DLLAMA_CURL=OFF \
+                         -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
                          -DLLAMA_BUILD_TOOLS=ON \
@@ -72,12 +72,12 @@ jobs:
                   glslc \
                   gcc-14-riscv64-linux-gnu \
                   g++-14-riscv64-linux-gnu \
-                  libvulkan-dev:riscv64 \
-                  libcurl4-openssl-dev:riscv64
+                  libvulkan-dev:riscv64
 
       - name: Build
         run: |
-          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+          cmake -B build -DLLAMA_CURL=OFF \
+                         -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
@@ -118,12 +118,12 @@ jobs:
                   build-essential \
                   glslc \
                   crossbuild-essential-arm64 \
-                  libvulkan-dev:arm64 \
-                  libcurl4-openssl-dev:arm64
+                  libvulkan-dev:arm64
 
       - name: Build
         run: |
-          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+          cmake -B build -DLLAMA_CURL=OFF \
+                         -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
@@ -163,12 +163,12 @@ jobs:
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   gcc-14-powerpc64le-linux-gnu \
-                  g++-14-powerpc64le-linux-gnu \
-                  libcurl4-openssl-dev:ppc64el
+                  g++-14-powerpc64le-linux-gnu
 
       - name: Build
         run: |
-          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+          cmake -B build -DLLAMA_CURL=OFF \
+                         -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
                          -DLLAMA_BUILD_TOOLS=ON \
@@ -209,12 +209,12 @@ jobs:
                   glslc \
                   gcc-14-powerpc64le-linux-gnu \
                   g++-14-powerpc64le-linux-gnu \
-                  libvulkan-dev:ppc64el \
-                  libcurl4-openssl-dev:ppc64el
+                  libvulkan-dev:ppc64el
 
       - name: Build
         run: |
-          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+          cmake -B build -DLLAMA_CURL=OFF \
+                         -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
 
@@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
     arg.cpp
     arg.h
     base64.hpp
-    chat.cpp
-    chat.h
     chat-parser.cpp
     chat-parser.h
+    chat.cpp
+    chat.h
     common.cpp
     common.h
     console.cpp
     console.h
-    json-schema-to-grammar.cpp
-    json.hpp
-    json-partial.h
     json-partial.cpp
+    json-partial.h
+    json-schema-to-grammar.cpp
     llguidance.cpp
     log.cpp
     log.h
-    minja/chat-template.hpp
-    minja/minja.hpp
     ngram-cache.cpp
     ngram-cache.h
     regex-partial.cpp
@@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
 endif ()
 
-target_include_directories(${TARGET} PUBLIC .)
+target_include_directories(${TARGET} PUBLIC . ../vendor)
 target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
 
 
@@ -16,6 +16,9 @@
 #    include <windows.h>
 #endif
 
+#define JSON_ASSERT GGML_ASSERT
+#include <nlohmann/json.hpp>
+
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
@@ -36,8 +39,6 @@
 #    include <future>
 #endif
 
-#include "json-schema-to-grammar.h"
-
 using json = nlohmann::ordered_json;
 
 std::initializer_list<enum llama_example> mmproj_examples = {
@@ -1300,6 +1301,7 @@ common_params_context common_params_parser_init(common_params & params,
     sampler_type_names.pop_back();
 
     params.optimize = ggml_opt_get_default_optimizer_params(NULL);
+    params.optimize.lr = 1e-8;
 
     /**
      * filter options by example
@@ -1385,19 +1387,22 @@ common_params_context common_params_parser_init(common_params & params,
     add_opt(common_arg(
         {"--prio"}, "N",
         string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
-                   [](common_params & params, int prio) {
-                       if (prio < 0 || prio > 3) {
-                           throw std::invalid_argument("invalid value");
-                       }
-                       params.cpuparams.priority = (enum ggml_sched_priority) prio;
-                   }));
-    add_opt(common_arg({ "--poll" },
-        "<0...100>",
-        string_format(
-            "use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
-        [](common_params & params, const std::string & value) { params.cpuparams.poll = std::stoul(value); }));
-    add_opt(common_arg({ "-Cb", "--cpu-mask-batch" },
-        "M",
+        [](common_params & params, int prio) {
+            if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
+                throw std::invalid_argument("invalid value");
+            }
+            params.cpuparams.priority = (enum ggml_sched_priority) prio;
+        }
+    ));
+    add_opt(common_arg(
+        {"--poll"}, "<0...100>",
+        string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
+        [](common_params & params, const std::string & value) {
+            params.cpuparams.poll = std::stoul(value);
+        }
+    ));
+    add_opt(common_arg(
+        {"-Cb", "--cpu-mask-batch"}, "M",
         "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)",
         [](common_params & params, const std::string & mask) {
             params.cpuparams_batch.mask_valid = true;
@@ -2138,7 +2143,7 @@ common_params_context common_params_parser_init(common_params & params,
                 .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
     add_opt(common_arg({ "-opt", "--optimizer" },
         "sgd|adamw",
-        "adamw or //TODO:sgd",
+        "adamw or sgd",
         [](common_params & params, const std::string & name) {
             params.optimize.optimizer = ggml_opt_get_optimizer(name.c_str());
             if (params.optimize.optimizer == GGML_OPT_OPTIMIZER_COUNT) {
 
@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
             if (!rest.empty()) {
                 handle_reasoning(rest, /* closed */ !is_partial());
             }
-            if (!syntax_.thinking_forced_open) {
-                throw common_chat_msg_partial_exception(end_think);
-            }
+            // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
+            // if (!syntax_.thinking_forced_open) {
+            //     throw common_chat_msg_partial_exception(end_think);
+            // }
             return true;
         }
     }
 
@@ -2,9 +2,10 @@
 
 #include "chat.h"
 #include "json-partial.h"
-#include "json.hpp"
 #include "regex-partial.h"
 
+#include <nlohmann/json.hpp>
+
 #include <optional>
 #include <string>
 #include <vector>
 
@@ -1,13 +1,14 @@
 #include "chat.h"
 #include "chat-parser.h"
 #include "common.h"
+#include "json-partial.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
-#include "json-partial.h"
-#include "minja/chat-template.hpp"
-#include "minja/minja.hpp"
 #include "regex-partial.h"
 
+#include <minja/chat-template.hpp>
+#include <minja/minja.hpp>
+
 #include <cstdio>
 #include <exception>
 #include <iostream>
@@ -16,7 +17,6 @@
 #include <string>
 #include <vector>
 
-
 static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
     auto time = std::chrono::system_clock::to_time_t(now);
     auto local_time = *std::localtime(&time);
 
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
 
     DWORD p = NORMAL_PRIORITY_CLASS;
     switch (prio) {
+        case GGML_SCHED_PRIO_LOW:      p = BELOW_NORMAL_PRIORITY_CLASS; break;
         case GGML_SCHED_PRIO_NORMAL:   p = NORMAL_PRIORITY_CLASS;       break;
         case GGML_SCHED_PRIO_MEDIUM:   p = ABOVE_NORMAL_PRIORITY_CLASS; break;
         case GGML_SCHED_PRIO_HIGH:     p = HIGH_PRIORITY_CLASS;         break;
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
 
     int p = 0;
     switch (prio) {
+        case GGML_SCHED_PRIO_LOW:      p =  5;  break;
         case GGML_SCHED_PRIO_NORMAL:   p =  0;  break;
         case GGML_SCHED_PRIO_MEDIUM:   p = -5;  break;
         case GGML_SCHED_PRIO_HIGH:     p = -10; break;
 
@@ -1,9 +1,10 @@
-#include <json-partial.h>
-#include "ggml.h"
+#include "json-partial.h"
+
 #include "log.h"
-#include <string>
 
-#include <json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <string>
 
 using json = nlohmann::ordered_json;
 
 
@@ -1,5 +1,6 @@
 #pragma once
-#include <json.hpp>
+
+#include <nlohmann/json.hpp>
 
 // Healing marker (empty if the JSON was fully parsed / wasn't healed).
 struct common_healing_marker {
Original file line number	Diff line number	Diff line change
`@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think`
`154`	`154`	`if (!rest.empty()) {`
`155`	`155`	`handle_reasoning(rest, /* closed */ !is_partial());`
`156`	`156`	`}`
`157`		`- if (!syntax_.thinking_forced_open) {`
`158`		`- throw common_chat_msg_partial_exception(end_think);`
`159`		`- }`
	`157`	`+ // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)`
	`158`	`+ // if (!syntax_.thinking_forced_open) {`
	`159`	`+ // throw common_chat_msg_partial_exception(end_think);`
	`160`	`+ // }`
`160`	`161`	`return true;`
`161`	`162`	`}`
`162`	`163`	`}`