Skip to content

Commit ff48957

Browse files
authored
bump llama.cpp to b7356 (#501)
1 parent 6bef10a commit ff48957

File tree

11 files changed

+778
-310
lines changed

11 files changed

+778
-310
lines changed

llamacpp/native/src/server/server-common.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,18 @@ int32_t server_tokens::process_chunk(
494494
return 0;
495495
}
496496

497+
server_tokens server_tokens::clone() const {
498+
server_tokens res;
499+
res.has_mtmd = has_mtmd;
500+
res.tokens = tokens;
501+
for (auto it = map_idx_to_media.begin(); it != map_idx_to_media.end(); ++it) {
502+
size_t idx = it->first;
503+
const mtmd::input_chunk_ptr & chunk = it->second;
504+
res.map_idx_to_media[idx] = mtmd::input_chunk_ptr(mtmd_input_chunk_copy(chunk.get()));
505+
}
506+
return res;
507+
}
508+
497509
//
498510
// tokenizer and input processing utils
499511
//
@@ -745,12 +757,6 @@ json oaicompat_completion_params_parse(const json & body) {
745757
llama_params["stop"] = json_value(body, "stop", json::array());
746758
}
747759

748-
// Handle "n" field
749-
int n_choices = json_value(body, "n", 1);
750-
if (n_choices != 1) {
751-
throw std::runtime_error("Only one completion choice is allowed");
752-
}
753-
754760
// Handle "echo" field
755761
if (json_value(body, "echo", false)) {
756762
throw std::runtime_error("Only no echo is supported");
@@ -791,7 +797,7 @@ static void handle_media(
791797
SRV_INF("downloading image from '%s'\n", url.c_str());
792798
auto res = common_remote_get_content(url, params);
793799
if (200 <= res.first && res.first < 300) {
794-
SRV_INF("downloaded %ld bytes\n", res.second.size());
800+
SRV_INF("downloaded %zu bytes\n", res.second.size());
795801
raw_buffer data;
796802
data.insert(data.end(), res.second.begin(), res.second.end());
797803
out_files.push_back(data);
@@ -966,6 +972,9 @@ json oaicompat_chat_params_parse(
966972
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
967973
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
968974
inputs.reasoning_format = opt.reasoning_format;
975+
if (body.contains("reasoning_format")) {
976+
inputs.reasoning_format = common_reasoning_format_from_name(body.at("reasoning_format").get<std::string>());
977+
}
969978
inputs.enable_thinking = opt.enable_thinking;
970979
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
971980
if (body.contains("grammar")) {
@@ -1045,11 +1054,8 @@ json oaicompat_chat_params_parse(
10451054
for (const auto & stop : chat_params.additional_stops) {
10461055
llama_params["stop"].push_back(stop);
10471056
}
1048-
1049-
// Handle "n" field
1050-
int n_choices = json_value(body, "n", 1);
1051-
if (n_choices != 1) {
1052-
throw std::invalid_argument("Only one completion choice is allowed");
1057+
if (!chat_params.parser.empty()) {
1058+
llama_params["chat_parser"] = chat_params.parser;
10531059
}
10541060

10551061
// Handle "logprobs" field

llamacpp/native/src/server/server-common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "
1818
using json = nlohmann::ordered_json;
1919

2020
#define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
21+
#define SLT_CNT(slot, fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
2122
#define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
2223
#define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
2324
#define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
2425

2526
#define SRV_INF(fmt, ...) LOG_INF("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
27+
#define SRV_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
2628
#define SRV_WRN(fmt, ...) LOG_WRN("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
2729
#define SRV_ERR(fmt, ...) LOG_ERR("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
2830
#define SRV_DBG(fmt, ...) LOG_DBG("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
@@ -215,6 +217,8 @@ struct server_tokens {
215217
llama_pos pos,
216218
int32_t seq_id,
217219
size_t & n_tokens_out) const;
220+
221+
server_tokens clone() const;
218222
};
219223

220224

0 commit comments

Comments
 (0)