Skip to content

Commit 2476e13

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents bbb592d + 7adc79c commit 2476e13

File tree

131 files changed

+18228
-7059
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+18228
-7059
lines changed

.github/workflows/build.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,39 @@ jobs:
387387
cd build
388388
ctest -L main --verbose
389389
390+
ubuntu-24-cmake-vulkan-deb:
391+
runs-on: ubuntu-24.04
392+
393+
steps:
394+
- name: Clone
395+
id: checkout
396+
uses: actions/checkout@v4
397+
398+
- name: ccache
399+
uses: ggml-org/[email protected]
400+
with:
401+
key: ubuntu-24-cmake-vulkan-deb
402+
evict-old-files: 1d
403+
404+
- name: Dependencies
405+
id: depends
406+
run: |
407+
sudo apt-get install -y glslc libvulkan-dev libcurl4-openssl-dev
408+
409+
- name: Configure
410+
id: cmake_configure
411+
run: |
412+
cmake -B build \
413+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
414+
-DGGML_BACKEND_DL=ON \
415+
-DGGML_CPU_ALL_VARIANTS=ON \
416+
-DGGML_VULKAN=ON
417+
418+
- name: Build
419+
id: cmake_build
420+
run: |
421+
cmake --build build -j $(nproc)
422+
390423
ubuntu-24-cmake-vulkan:
391424
runs-on: ubuntu-24.04
392425

common/arg.cpp

Lines changed: 170 additions & 134 deletions
Large diffs are not rendered by default.

common/chat-parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
432432
if (is_arguments_path({})) {
433433
// Entire JSON is the arguments and was parsed fully.
434434
return consume_json_result {
435-
partial->json.dump(),
435+
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
436436
/* .is_partial = */ false,
437437
};
438438
}
@@ -444,7 +444,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
444444
std::vector<std::string> path;
445445
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
446446
if (is_arguments_path(path)) {
447-
auto arguments = j.dump();
447+
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
448448
if (is_partial() && !partial->healing_marker.marker.empty()) {
449449
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
450450
if (idx != std::string::npos) {

common/chat.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ struct common_chat_msg_content_part {
3333
struct common_chat_msg {
3434
std::string role;
3535
std::string content;
36-
std::vector<common_chat_msg_content_part> content_parts = {};
37-
std::vector<common_chat_tool_call> tool_calls = {};
36+
std::vector<common_chat_msg_content_part> content_parts;
37+
std::vector<common_chat_tool_call> tool_calls;
3838
std::string reasoning_content;
3939
std::string tool_name;
4040
std::string tool_call_id;
@@ -44,7 +44,7 @@ struct common_chat_msg {
4444
bool empty() const {
4545
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
4646
}
47-
void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
47+
void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
4848
for (auto i = 0u; i < tool_calls.size(); i++) {
4949
if (ids_cache.size() <= i) {
5050
auto id = tool_calls[i].id;

common/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ struct common_params {
378378
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
379379
bool cont_batching = true; // insert new sequences for decoding on-the-fly
380380
bool no_perf = false; // disable performance metrics
381-
bool ctx_shift = false; // context shift on infinite text generation
381+
bool ctx_shift = false; // context shift on infinite text generation
382382
bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
383383
bool kv_unified = false; // enable unified KV cache
384384

@@ -425,7 +425,8 @@ struct common_params {
425425
int32_t timeout_write = timeout_read; // http write timeout in seconds
426426
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
427427
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
428-
int32_t n_ctx_checkpoints = 3; // max number of context checkpoints per slot
428+
int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot
429+
int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.
429430

430431
std::string hostname = "127.0.0.1";
431432
std::string public_path = ""; // NOLINT

common/json-partial.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <nlohmann/json.hpp>
66

77
#include <string>
8+
#include <regex>
89

910
using json = nlohmann::ordered_json;
1011

@@ -168,6 +169,47 @@ bool common_json_parse(
168169
}
169170
}
170171

172+
// Matches a potentially partial unicode escape sequence, e.g. \u, \uX, \uXX, \uXXX, \uXXXX
173+
static const std::regex partial_unicode_regex(R"(\\u(?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F])?)?)?)?$)");
174+
175+
auto is_high_surrogate = [&](const std::string & s) {
176+
// Check if a partial of a high surrogate (U+D800-U+DBFF)
177+
return s.length() >= 4 &&
178+
s[0] == '\\' && s[1] == 'u' &&
179+
std::tolower(s[2]) == 'd' &&
180+
(s[3] == '8' || s[3] == '9' || std::tolower(s[3]) == 'a' || std::tolower(s[3]) == 'b');
181+
};
182+
183+
// Initialize the unicode marker to a low surrogate to handle the edge case
184+
// where a high surrogate (U+D800-U+DBFF) is immediately followed by a
185+
// backslash (\)
186+
std::string unicode_marker_padding = "udc00";
187+
std::smatch last_unicode_seq;
188+
189+
if (std::regex_search(str, last_unicode_seq, partial_unicode_regex)) {
190+
std::smatch second_last_seq;
191+
std::string prelude = str.substr(0, last_unicode_seq.position());
192+
193+
// Pad the escape sequence with 0s until it forms a complete sequence of 6 characters
194+
unicode_marker_padding = std::string(6 - last_unicode_seq.length(), '0');
195+
196+
if (is_high_surrogate(last_unicode_seq.str())) {
197+
// If the sequence is a partial match for a high surrogate, add a low surrogate (U+DC00-U+UDFF)
198+
unicode_marker_padding += "\\udc00";
199+
} else if (std::regex_search(prelude, second_last_seq, partial_unicode_regex)) {
200+
if (is_high_surrogate(second_last_seq.str())) {
201+
// If this follows a high surrogate, pad it to be a low surrogate
202+
if (last_unicode_seq.length() == 2) {
203+
unicode_marker_padding = "dc00";
204+
} else if (last_unicode_seq.length() == 3) {
205+
unicode_marker_padding = "c00";
206+
} else {
207+
// The original unicode_marker_padding is already padded with 0s
208+
}
209+
}
210+
}
211+
}
212+
171213
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
172214

173215
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
@@ -186,6 +228,9 @@ bool common_json_parse(
186228
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
187229
// Was inside an object value string after an escape
188230
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
231+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
232+
// Was inside an object value string after a partial unicode escape
233+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
189234
} else {
190235
// find last :
191236
auto last_pos = str.find_last_of(':');
@@ -205,6 +250,9 @@ bool common_json_parse(
205250
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
206251
// Was inside an array value string after an escape
207252
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
253+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
254+
// Was inside an array value string after a partial unicode escape
255+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
208256
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
209257
// Had just finished a value
210258
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
@@ -230,6 +278,9 @@ bool common_json_parse(
230278
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
231279
// Was inside an object key string after an escape
232280
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
281+
} else if (can_parse(str + unicode_marker_padding + "\": 1" + closing)) {
282+
// Was inside an object key string after a partial unicode escape
283+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\": 1" + closing;
233284
} else {
234285
auto last_pos = str.find_last_of(':');
235286
if (last_pos == std::string::npos) {

convert_hf_to_gguf.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5966,20 +5966,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
59665966
class JambaModel(TextModel):
59675967
model_arch = gguf.MODEL_ARCH.JAMBA
59685968

5969-
def get_vocab_base_pre(self, tokenizer) -> str:
5970-
del tokenizer # unused
5971-
5972-
return "gpt-2"
5973-
59745969
def set_vocab(self):
59755970
if (self.dir_model / "tokenizer.model").is_file():
5976-
# Using Jamba's tokenizer.json causes errors on model load
5977-
# (something about "byte not found in vocab"),
5978-
# but there's a working tokenizer.model
59795971
self._set_vocab_sentencepiece()
59805972
else:
5981-
# Some Jamba models only have a tokenizer.json, which works.
5982-
self._set_vocab_gpt2()
5973+
self._set_vocab_llama_hf()
5974+
self.gguf_writer.add_add_space_prefix(False)
59835975

59845976
def set_gguf_parameters(self):
59855977
d_model = self.find_hparam(["hidden_size", "mamba_d_model"])

docs/ops.md

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Legend:
2222
| ARANGE ||||||||||
2323
| ARGMAX ||||||||||
2424
| ARGSORT ||||||||||
25+
| CEIL ||||||||||
2526
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2627
| CONCAT |||| 🟡 || 🟡 | 🟡 |||
2728
| CONT || 🟡 |||| 🟡 | 🟡 | 🟡 ||
@@ -31,7 +32,7 @@ Legend:
3132
| CONV_TRANSPOSE_1D ||||||||||
3233
| CONV_TRANSPOSE_2D ||||||||||
3334
| COS ||||| 🟡 ||| 🟡 ||
34-
| COUNT_EQUAL ||||||| |||
35+
| COUNT_EQUAL ||||||| |||
3536
| CPY || 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 ||
3637
| CROSS_ENTROPY_LOSS ||||||||||
3738
| CROSS_ENTROPY_LOSS_BACK ||||||||||
@@ -41,6 +42,7 @@ Legend:
4142
| ELU |||| 🟡 | 🟡 || 🟡 |||
4243
| EXP |||| 🟡 | 🟡 || 🟡 |||
4344
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
45+
| FLOOR ||||||||||
4446
| GATED_LINEAR_ATTN ||||||||||
4547
| GEGLU ||||| 🟡 ||| 🟡 ||
4648
| GEGLU_ERF ||||| 🟡 ||| 🟡 ||
@@ -51,7 +53,7 @@ Legend:
5153
| GET_ROWS || 🟡 || 🟡 || 🟡 | 🟡 | 🟡 ||
5254
| GET_ROWS_BACK ||| 🟡 | 🟡 ||||||
5355
| GROUP_NORM ||||||||||
54-
| GROUP_NORM_MUL_ADD ||||||| |||
56+
| GROUP_NORM_MUL_ADD ||||||| |||
5557
| HARDSIGMOID |||| 🟡 | 🟡 || 🟡 |||
5658
| HARDSWISH |||| 🟡 | 🟡 || 🟡 |||
5759
| IM2COL ||||| 🟡 |||||
@@ -65,11 +67,11 @@ Legend:
6567
| MUL_MAT_ID || 🟡 |||| 🟡 | 🟡 |||
6668
| NEG |||| 🟡 | 🟡 || 🟡 |||
6769
| NORM ||||| 🟡 ||| 🟡 ||
68-
| NORM_MUL_ADD ||||||| |||
70+
| NORM_MUL_ADD ||||||| |||
6971
| OPT_STEP_ADAMW ||||||||||
7072
| OPT_STEP_SGD ||||||||||
7173
| OUT_PROD | 🟡 || 🟡 | 🟡 ||| 🟡 |||
72-
| PAD ||||||| |||
74+
| PAD ||||||| 🟡 |||
7375
| PAD_REFLECT_1D ||||||||||
7476
| POOL_2D || 🟡 ||||||||
7577
| REGLU ||||| 🟡 ||| 🟡 ||
@@ -82,6 +84,7 @@ Legend:
8284
| ROLL ||||||||||
8385
| ROPE || 🟡 ||||||||
8486
| ROPE_BACK ||||||||||
87+
| ROUND ||||||||||
8588
| RWKV_WKV6 ||||||||||
8689
| RWKV_WKV7 ||||||||||
8790
| SCALE || 🟡 ||||||||
@@ -92,19 +95,22 @@ Legend:
9295
| SILU |||| 🟡 | 🟡 | 🟡 | 🟡 | 🟡 ||
9396
| SILU_BACK ||||||||||
9497
| SIN ||||| 🟡 ||| 🟡 ||
95-
| SOFTCAP ||||||| |||
96-
| SOFT_MAX || 🟡 ||||| 🟡 |||
97-
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| |||
98+
| SOFTCAP ||||||| |||
99+
| SOFT_MAX || 🟡 ||||| |||
100+
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| 🟡 |||
98101
| SQR ||||| 🟡 ||| 🟡 ||
99102
| SQRT ||||| 🟡 |||||
100103
| SSM_CONV ||||||||||
101104
| SSM_SCAN ||||||||||
102105
| STEP |||| 🟡 | 🟡 || 🟡 |||
103106
| SUB ||||| 🟡 | 🟡 ||||
104107
| SUM ||||||||||
105-
| SUM_ROWS ||||||| |||
108+
| SUM_ROWS ||||||| 🟡 |||
106109
| SWIGLU ||||| 🟡 ||| 🟡 ||
107110
| SWIGLU_OAI ||||||||||
108111
| TANH |||| 🟡 | 🟡 || 🟡 | 🟡 ||
109112
| TIMESTEP_EMBEDDING ||||||||||
113+
| TOPK_MOE ||||||||||
114+
| TRUNC ||||||||||
110115
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||
116+
| XIELU ||||||||||

docs/ops/CPU.csv

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@
5959
"CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
6060
"CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
6161
"CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
62+
"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
63+
"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
64+
"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
65+
"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
66+
"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
67+
"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
68+
"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
69+
"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6270
"CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
6371
"CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
6472
"CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
@@ -119,6 +127,14 @@
119127
"CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
120128
"CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
121129
"CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
130+
"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
131+
"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
132+
"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
133+
"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
134+
"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
135+
"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
136+
"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
137+
"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
122138
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CPU"
123139
"CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CPU"
124140
"CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CPU"

0 commit comments

Comments
 (0)