ggml-org
diff --git a/‎.editorconfig‎
Lines changed: 1 addition & 1 deletion b/‎.editorconfig‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 5 additions & 8 deletions b/‎common/CMakeLists.txt‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 8 additions & 6 deletions b/‎common/arg.cpp‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎common/chat-parser.cpp‎
Lines changed: 4 additions & 3 deletions b/‎common/chat-parser.cpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎common/chat-parser.h‎
Lines changed: 2 additions & 1 deletion b/‎common/chat-parser.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎common/chat.cpp‎
Lines changed: 4 additions & 4 deletions b/‎common/chat.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎common/common.cpp‎
Lines changed: 2 additions & 0 deletions b/‎common/common.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/json-partial.cpp‎
Lines changed: 5 additions & 4 deletions b/‎common/json-partial.cpp‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎common/json-partial.h‎
Lines changed: 2 additions & 1 deletion b/‎common/json-partial.h‎
Lines changed: 2 additions & 1 deletion
@@ -49,6 +49,6 @@ charset = unset
 trim_trailing_whitespace = unset
 insert_final_newline = unset
 
-[tools/mtmd/vendor/miniaudio.h]
+[vendor/miniaudio/miniaudio.h]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 <details>
 <summary>Bindings</summary>
 
+- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
 - Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
 - Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
 - Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
 
@@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
     arg.cpp
     arg.h
     base64.hpp
-    chat.cpp
-    chat.h
     chat-parser.cpp
     chat-parser.h
+    chat.cpp
+    chat.h
     common.cpp
     common.h
     console.cpp
     console.h
-    json-schema-to-grammar.cpp
-    json.hpp
-    json-partial.h
     json-partial.cpp
+    json-partial.h
+    json-schema-to-grammar.cpp
     llguidance.cpp
     log.cpp
     log.h
-    minja/chat-template.hpp
-    minja/minja.hpp
     ngram-cache.cpp
     ngram-cache.h
     regex-partial.cpp
@@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
 endif ()
 
-target_include_directories(${TARGET} PUBLIC .)
+target_include_directories(${TARGET} PUBLIC . ../vendor)
 target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
 
 
@@ -1,10 +1,11 @@
-#include "gguf.h" // for reading GGUF splits
 #include "arg.h"
 
+#include "chat.h"
 #include "common.h"
+#include "gguf.h" // for reading GGUF splits
+#include "json-schema-to-grammar.h"
 #include "log.h"
 #include "sampling.h"
-#include "chat.h"
 
 // fix problem with std::min and std::max
 #if defined(_WIN32)
@@ -15,6 +16,9 @@
 #include <windows.h>
 #endif
 
+#define JSON_ASSERT GGML_ASSERT
+#include <nlohmann/json.hpp>
+
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
@@ -34,8 +38,6 @@
 #include <future>
 #endif
 
-#include "json-schema-to-grammar.h"
-
 using json = nlohmann::ordered_json;
 
 std::initializer_list<enum llama_example> mmproj_examples = {
@@ -1346,9 +1348,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ));
     add_opt(common_arg(
         {"--prio"}, "N",
-        string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
+        string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
         [](common_params & params, int prio) {
-            if (prio < 0 || prio > 3) {
+            if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
                 throw std::invalid_argument("invalid value");
             }
             params.cpuparams.priority = (enum ggml_sched_priority) prio;
 
@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
             if (!rest.empty()) {
                 handle_reasoning(rest, /* closed */ !is_partial());
             }
-            if (!syntax_.thinking_forced_open) {
-                throw common_chat_msg_partial_exception(end_think);
-            }
+            // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
+            // if (!syntax_.thinking_forced_open) {
+            //     throw common_chat_msg_partial_exception(end_think);
+            // }
             return true;
         }
     }
 
@@ -2,9 +2,10 @@
 
 #include "chat.h"
 #include "json-partial.h"
-#include "json.hpp"
 #include "regex-partial.h"
 
+#include <nlohmann/json.hpp>
+
 #include <optional>
 #include <string>
 #include <vector>
 
@@ -1,13 +1,14 @@
 #include "chat.h"
 #include "chat-parser.h"
 #include "common.h"
+#include "json-partial.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
-#include "json-partial.h"
-#include "minja/chat-template.hpp"
-#include "minja/minja.hpp"
 #include "regex-partial.h"
 
+#include <minja/chat-template.hpp>
+#include <minja/minja.hpp>
+
 #include <cstdio>
 #include <exception>
 #include <iostream>
@@ -16,7 +17,6 @@
 #include <string>
 #include <vector>
 
-
 static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
     auto time = std::chrono::system_clock::to_time_t(now);
     auto local_time = *std::localtime(&time);
 
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
 
     DWORD p = NORMAL_PRIORITY_CLASS;
     switch (prio) {
+        case GGML_SCHED_PRIO_LOW:      p = BELOW_NORMAL_PRIORITY_CLASS; break;
         case GGML_SCHED_PRIO_NORMAL:   p = NORMAL_PRIORITY_CLASS;       break;
         case GGML_SCHED_PRIO_MEDIUM:   p = ABOVE_NORMAL_PRIORITY_CLASS; break;
         case GGML_SCHED_PRIO_HIGH:     p = HIGH_PRIORITY_CLASS;         break;
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
 
     int p = 0;
     switch (prio) {
+        case GGML_SCHED_PRIO_LOW:      p =  5;  break;
         case GGML_SCHED_PRIO_NORMAL:   p =  0;  break;
         case GGML_SCHED_PRIO_MEDIUM:   p = -5;  break;
         case GGML_SCHED_PRIO_HIGH:     p = -10; break;
 
@@ -1,9 +1,10 @@
-#include <json-partial.h>
-#include "ggml.h"
+#include "json-partial.h"
+
 #include "log.h"
-#include <string>
 
-#include <json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <string>
 
 using json = nlohmann::ordered_json;
 
 
@@ -1,5 +1,6 @@
 #pragma once
-#include <json.hpp>
+
+#include <nlohmann/json.hpp>
 
 // Healing marker (empty if the JSON was fully parsed / wasn't healed).
 struct common_healing_marker {
Original file line number	Diff line number	Diff line change
`@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think`
`154`	`154`	`if (!rest.empty()) {`
`155`	`155`	`handle_reasoning(rest, /* closed */ !is_partial());`
`156`	`156`	`}`
`157`		`- if (!syntax_.thinking_forced_open) {`
`158`		`- throw common_chat_msg_partial_exception(end_think);`
`159`		`- }`
	`157`	`+ // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)`
	`158`	`+ // if (!syntax_.thinking_forced_open) {`
	`159`	`+ // throw common_chat_msg_partial_exception(end_think);`
	`160`	`+ // }`
`160`	`161`	`return true;`
`161`	`162`	`}`
`162`	`163`	`}`