Skip to content

Commit 2946277

Browse files
authored
Merge branch 'master' into cisc/fix-nomic-bert-moe-mask
2 parents a835a0c + c496fe0 commit 2946277

File tree

121 files changed

+8237
-7190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+8237
-7190
lines changed

.editorconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,6 @@ charset = unset
4949
trim_trailing_whitespace = unset
5050
insert_final_newline = unset
5151

52-
[tools/mtmd/vendor/miniaudio.h]
52+
[vendor/miniaudio/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
130130
<details>
131131
<summary>Bindings</summary>
132132

133+
- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
133134
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
134135
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
135136
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)

common/CMakeLists.txt

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
5858
arg.cpp
5959
arg.h
6060
base64.hpp
61-
chat.cpp
62-
chat.h
6361
chat-parser.cpp
6462
chat-parser.h
63+
chat.cpp
64+
chat.h
6565
common.cpp
6666
common.h
6767
console.cpp
6868
console.h
69-
json-schema-to-grammar.cpp
70-
json.hpp
71-
json-partial.h
7269
json-partial.cpp
70+
json-partial.h
71+
json-schema-to-grammar.cpp
7372
llguidance.cpp
7473
log.cpp
7574
log.h
76-
minja/chat-template.hpp
77-
minja/minja.hpp
7875
ngram-cache.cpp
7976
ngram-cache.h
8077
regex-partial.cpp
@@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
147144
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
148145
endif ()
149146

150-
target_include_directories(${TARGET} PUBLIC .)
147+
target_include_directories(${TARGET} PUBLIC . ../vendor)
151148
target_compile_features (${TARGET} PUBLIC cxx_std_17)
152149
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
153150

common/arg.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
#include "gguf.h" // for reading GGUF splits
21
#include "arg.h"
32

3+
#include "chat.h"
44
#include "common.h"
5+
#include "gguf.h" // for reading GGUF splits
6+
#include "json-schema-to-grammar.h"
57
#include "log.h"
68
#include "sampling.h"
7-
#include "chat.h"
89

910
// fix problem with std::min and std::max
1011
#if defined(_WIN32)
@@ -15,6 +16,9 @@
1516
#include <windows.h>
1617
#endif
1718

19+
#define JSON_ASSERT GGML_ASSERT
20+
#include <nlohmann/json.hpp>
21+
1822
#include <algorithm>
1923
#include <climits>
2024
#include <cstdarg>
@@ -34,8 +38,6 @@
3438
#include <future>
3539
#endif
3640

37-
#include "json-schema-to-grammar.h"
38-
3941
using json = nlohmann::ordered_json;
4042

4143
std::initializer_list<enum llama_example> mmproj_examples = {
@@ -1346,9 +1348,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13461348
));
13471349
add_opt(common_arg(
13481350
{"--prio"}, "N",
1349-
string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
1351+
string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
13501352
[](common_params & params, int prio) {
1351-
if (prio < 0 || prio > 3) {
1353+
if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
13521354
throw std::invalid_argument("invalid value");
13531355
}
13541356
params.cpuparams.priority = (enum ggml_sched_priority) prio;

common/chat-parser.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,10 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
154154
if (!rest.empty()) {
155155
handle_reasoning(rest, /* closed */ !is_partial());
156156
}
157-
if (!syntax_.thinking_forced_open) {
158-
throw common_chat_msg_partial_exception(end_think);
159-
}
157+
// Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
158+
// if (!syntax_.thinking_forced_open) {
159+
// throw common_chat_msg_partial_exception(end_think);
160+
// }
160161
return true;
161162
}
162163
}

common/chat-parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
#include "chat.h"
44
#include "json-partial.h"
5-
#include "json.hpp"
65
#include "regex-partial.h"
76

7+
#include <nlohmann/json.hpp>
8+
89
#include <optional>
910
#include <string>
1011
#include <vector>

common/chat.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#include "chat.h"
22
#include "chat-parser.h"
33
#include "common.h"
4+
#include "json-partial.h"
45
#include "json-schema-to-grammar.h"
56
#include "log.h"
6-
#include "json-partial.h"
7-
#include "minja/chat-template.hpp"
8-
#include "minja/minja.hpp"
97
#include "regex-partial.h"
108

9+
#include <minja/chat-template.hpp>
10+
#include <minja/minja.hpp>
11+
1112
#include <cstdio>
1213
#include <exception>
1314
#include <iostream>
@@ -16,7 +17,6 @@
1617
#include <string>
1718
#include <vector>
1819

19-
2020
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
2121
auto time = std::chrono::system_clock::to_time_t(now);
2222
auto local_time = *std::localtime(&time);

common/common.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
203203

204204
DWORD p = NORMAL_PRIORITY_CLASS;
205205
switch (prio) {
206+
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
206207
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
207208
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
208209
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
228229

229230
int p = 0;
230231
switch (prio) {
232+
case GGML_SCHED_PRIO_LOW: p = 5; break;
231233
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
232234
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
233235
case GGML_SCHED_PRIO_HIGH: p = -10; break;

common/json-partial.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
#include <json-partial.h>
2-
#include "ggml.h"
1+
#include "json-partial.h"
2+
33
#include "log.h"
4-
#include <string>
54

6-
#include <json.hpp>
5+
#include <nlohmann/json.hpp>
6+
7+
#include <string>
78

89
using json = nlohmann::ordered_json;
910

common/json-partial.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
2-
#include <json.hpp>
2+
3+
#include <nlohmann/json.hpp>
34

45
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
56
struct common_healing_marker {

0 commit comments

Comments
 (0)