Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions llamacpp/native/src/server/server-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,18 @@ int32_t server_tokens::process_chunk(
return 0;
}

server_tokens server_tokens::clone() const {
server_tokens res;
res.has_mtmd = has_mtmd;
res.tokens = tokens;
for (auto it = map_idx_to_media.begin(); it != map_idx_to_media.end(); ++it) {
size_t idx = it->first;
const mtmd::input_chunk_ptr & chunk = it->second;
res.map_idx_to_media[idx] = mtmd::input_chunk_ptr(mtmd_input_chunk_copy(chunk.get()));
}
Comment on lines +501 to +505
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For better readability and to use a more modern C++ approach, this iterator-based for loop can be replaced with a range-based for loop. This makes the code more concise and easier to understand.

    for (const auto & pair : map_idx_to_media) {
        size_t idx = pair.first;
        const mtmd::input_chunk_ptr & chunk = pair.second;
        res.map_idx_to_media[idx] = mtmd::input_chunk_ptr(mtmd_input_chunk_copy(chunk.get()));
    }

return res;
}

//
// tokenizer and input processing utils
//
Expand Down Expand Up @@ -745,12 +757,6 @@ json oaicompat_completion_params_parse(const json & body) {
llama_params["stop"] = json_value(body, "stop", json::array());
}

// Handle "n" field
int n_choices = json_value(body, "n", 1);
if (n_choices != 1) {
throw std::runtime_error("Only one completion choice is allowed");
}

// Handle "echo" field
if (json_value(body, "echo", false)) {
throw std::runtime_error("Only no echo is supported");
Expand Down Expand Up @@ -791,7 +797,7 @@ static void handle_media(
SRV_INF("downloading image from '%s'\n", url.c_str());
auto res = common_remote_get_content(url, params);
if (200 <= res.first && res.first < 300) {
SRV_INF("downloaded %ld bytes\n", res.second.size());
SRV_INF("downloaded %zu bytes\n", res.second.size());
raw_buffer data;
data.insert(data.end(), res.second.begin(), res.second.end());
out_files.push_back(data);
Expand Down Expand Up @@ -966,6 +972,9 @@ json oaicompat_chat_params_parse(
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
inputs.reasoning_format = opt.reasoning_format;
if (body.contains("reasoning_format")) {
inputs.reasoning_format = common_reasoning_format_from_name(body.at("reasoning_format").get<std::string>());
}
inputs.enable_thinking = opt.enable_thinking;
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
if (body.contains("grammar")) {
Expand Down Expand Up @@ -1045,11 +1054,8 @@ json oaicompat_chat_params_parse(
for (const auto & stop : chat_params.additional_stops) {
llama_params["stop"].push_back(stop);
}

// Handle "n" field
int n_choices = json_value(body, "n", 1);
if (n_choices != 1) {
throw std::invalid_argument("Only one completion choice is allowed");
if (!chat_params.parser.empty()) {
llama_params["chat_parser"] = chat_params.parser;
}

// Handle "logprobs" field
Expand Down
4 changes: 4 additions & 0 deletions llamacpp/native/src/server/server-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "
using json = nlohmann::ordered_json;

#define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
#define SLT_CNT(slot, fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
#define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
#define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)
#define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, ((slot).task ? (slot).task->id : -1), __VA_ARGS__)

#define SRV_INF(fmt, ...) LOG_INF("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
#define SRV_CNT(fmt, ...) LOG_CNT("" fmt, __VA_ARGS__)
#define SRV_WRN(fmt, ...) LOG_WRN("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
#define SRV_ERR(fmt, ...) LOG_ERR("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
#define SRV_DBG(fmt, ...) LOG_DBG("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
Expand Down Expand Up @@ -215,6 +217,8 @@ struct server_tokens {
llama_pos pos,
int32_t seq_id,
size_t & n_tokens_out) const;

server_tokens clone() const;
};


Expand Down
Loading