Skip to content

Commit 72c6e67

Browse files
committed
Merge branch 'master' into GraniteDocling
2 parents 4be2ce9 + 3526657 commit 72c6e67

File tree

153 files changed

+1013
-644
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+1013
-644
lines changed

common/arg.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) {
16151615
if (!rpc_reg) {
16161616
throw std::invalid_argument("failed to find RPC backend");
16171617
}
1618-
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
1619-
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
1620-
if (!ggml_backend_rpc_add_device_fn) {
1621-
throw std::invalid_argument("failed to find RPC device add function");
1618+
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
1619+
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
1620+
if (!ggml_backend_rpc_add_server_fn) {
1621+
throw std::invalid_argument("failed to find RPC add server function");
16221622
}
16231623
for (const auto & server : rpc_servers) {
1624-
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
1625-
if (dev) {
1626-
ggml_backend_device_register(dev);
1627-
} else {
1628-
throw std::invalid_argument("failed to register RPC device");
1629-
}
1624+
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
1625+
ggml_backend_register(reg);
16301626
}
16311627
}
16321628

@@ -1932,13 +1928,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19321928
}
19331929
).set_env("LLAMA_ARG_SWA_FULL"));
19341930
add_opt(common_arg(
1935-
{"--swa-checkpoints"}, "N",
1936-
string_format("max number of SWA checkpoints per slot to create (default: %d)\n"
1937-
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_swa_checkpoints),
1931+
{"--ctx-checkpoints", "--swa-checkpoints"}, "N",
1932+
string_format("max number of context checkpoints to create per slot (default: %d)\n"
1933+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_ctx_checkpoints),
19381934
[](common_params & params, int value) {
1939-
params.n_swa_checkpoints = value;
1935+
params.n_ctx_checkpoints = value;
19401936
}
1941-
).set_env("LLAMA_ARG_SWA_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER}));
1937+
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER}));
19421938
add_opt(common_arg(
19431939
{"--kv-unified", "-kvu"},
19441940
string_format("use single unified KV buffer for the KV cache of all sequences (default: %s)\n"

common/chat.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ const char * common_chat_format_name(common_chat_format format) {
625625
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
626626
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
627627
case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
628+
case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
628629
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
629630
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
630631
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
@@ -984,6 +985,65 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
984985
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
985986
return data;
986987
}
988+
989+
static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
990+
common_chat_params data;
991+
data.prompt = apply(tmpl, inputs);
992+
data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
993+
data.preserved_tokens = {
994+
"[THINK]",
995+
"[/THINK]",
996+
};
997+
998+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
999+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1000+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1001+
auto schemas = json::array();
1002+
foreach_function(inputs.tools, [&](const json & tool) {
1003+
const auto & function = tool.at("function");
1004+
schemas.push_back({
1005+
{"type", "object"},
1006+
{"properties", {
1007+
{"name", {
1008+
{"type", "string"},
1009+
{"const", function.at("name")},
1010+
}},
1011+
{"arguments", function.at("parameters")},
1012+
{"id", {
1013+
{"type", "string"},
1014+
{"pattern", "^[a-zA-Z0-9]{9}$"},
1015+
}},
1016+
}},
1017+
{"required", json::array({"name", "arguments", "id"})},
1018+
});
1019+
});
1020+
auto schema = json {
1021+
{"type", "array"},
1022+
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1023+
{"minItems", 1},
1024+
};
1025+
if (!inputs.parallel_tool_calls) {
1026+
schema["maxItems"] = 1;
1027+
}
1028+
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
1029+
});
1030+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
1031+
data.preserved_tokens.push_back("[TOOL_CALLS]");
1032+
} else {
1033+
data.grammar_lazy = false;
1034+
if (!inputs.json_schema.is_null()) {
1035+
if (!inputs.grammar.empty()) {
1036+
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
1037+
}
1038+
data.grammar = json_schema_to_grammar(inputs.json_schema);
1039+
} else {
1040+
data.grammar = inputs.grammar;
1041+
}
1042+
}
1043+
1044+
return data;
1045+
}
1046+
9871047
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
9881048
if (!builder.syntax().parse_tool_calls) {
9891049
builder.add_content(builder.consume_rest());
@@ -994,6 +1054,18 @@ static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
9941054
parse_prefixed_json_tool_call_array(builder, prefix);
9951055
}
9961056

1057+
static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
1058+
builder.try_parse_reasoning("[THINK]", "[/THINK]");
1059+
1060+
if (!builder.syntax().parse_tool_calls) {
1061+
builder.add_content(builder.consume_rest());
1062+
return;
1063+
}
1064+
1065+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
1066+
parse_prefixed_json_tool_call_array(builder, prefix);
1067+
}
1068+
9971069
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
9981070
common_chat_params data;
9991071

@@ -2702,6 +2774,10 @@ static common_chat_params common_chat_templates_apply_jinja(
27022774
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
27032775
}
27042776

2777+
if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
2778+
return common_chat_params_init_magistral(tmpl, params);
2779+
}
2780+
27052781
// Plain handler (no tools)
27062782
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
27072783
return common_chat_params_init_without_tools(tmpl, params);
@@ -2802,6 +2878,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
28022878
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
28032879
common_chat_parse_mistral_nemo(builder);
28042880
break;
2881+
case COMMON_CHAT_FORMAT_MAGISTRAL:
2882+
common_chat_parse_magistral(builder);
2883+
break;
28052884
case COMMON_CHAT_FORMAT_LLAMA_3_X:
28062885
common_chat_parse_llama_3_1(builder);
28072886
break;

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ enum common_chat_format {
101101
COMMON_CHAT_FORMAT_CONTENT_ONLY,
102102
COMMON_CHAT_FORMAT_GENERIC,
103103
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
104+
COMMON_CHAT_FORMAT_MAGISTRAL,
104105
COMMON_CHAT_FORMAT_LLAMA_3_X,
105106
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
106107
COMMON_CHAT_FORMAT_DEEPSEEK_R1,

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ struct common_params {
424424
int32_t timeout_write = timeout_read; // http write timeout in seconds
425425
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
426426
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
427-
int32_t n_swa_checkpoints = 3; // max number of SWA checkpoints per slot
427+
int32_t n_ctx_checkpoints = 3; // max number of context checkpoints per slot
428428

429429
std::string hostname = "127.0.0.1";
430430
std::string public_path = ""; // NOLINT

ggml/include/ggml-backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ extern "C" {
215215
// Backend registry
216216
//
217217

218+
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
219+
218220
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
219221

220222
// Backend (reg) enumeration

ggml/include/ggml-rpc.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
extern "C" {
88
#endif
99

10-
#define RPC_PROTO_MAJOR_VERSION 2
10+
#define RPC_PROTO_MAJOR_VERSION 3
1111
#define RPC_PROTO_MINOR_VERSION 0
1212
#define RPC_PROTO_PATCH_VERSION 0
1313
#define GGML_RPC_MAX_SERVERS 16
1414

1515
// backend API
16-
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
16+
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device);
1717
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
1818

19-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
19+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device);
2020

21-
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
21+
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
2222

23-
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
24-
const char * cache_dir,
25-
size_t free_mem, size_t total_mem);
23+
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
24+
size_t n_threads, size_t n_devices,
25+
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
2626

2727
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
28-
29-
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
28+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);
3029

3130
#ifdef __cplusplus
3231
}

ggml/src/ggml-backend-impl.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,6 @@ extern "C" {
209209
void * context;
210210
};
211211

212-
// Internal backend registry API
213-
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
214-
215212
// Add backend dynamic loading support to the backend
216213

217214
// Initialize the backend

0 commit comments

Comments
 (0)