Skip to content

Commit 0dac7c2

Browse files
author
ThinkThinkSyn
committed
add mm support for /completion
1 parent 4825487 commit 0dac7c2

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,4 @@ poetry.toml
146146
# Local scripts
147147
/run-vim.sh
148148
/run-chat.sh
149+
.history

tools/server/server.cpp

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4332,9 +4332,69 @@ int main(int argc, char ** argv) {
43324332
}
43334333
};
43344334

4335-
const auto handle_completions = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4335+
const auto handle_completions = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
43364336
json data = json::parse(req.body);
4337-
std::vector<raw_buffer> files; // dummy
4337+
json & medias = json_value(data, "medias", json::array());
4338+
auto & opt = ctx_server.oai_parser_opt;
4339+
std::vector<raw_buffer> files;
4340+
4341+
if (medias.is_array()) {
4342+
for (auto & m : medias) {
4343+
std::string type = json_value(m, "type", std::string());
4344+
std::string data = json_value(m, "data", std::string());
4345+
if (type.empty() || data.empty()) {
4346+
continue;
4347+
}
4348+
if (type == "image_url" || type == "image" || type == "img") {
4349+
if (!opt.allow_image) {
4350+
throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
4351+
}
4352+
if (string_starts_with(data, "http")) {
4353+
// download remote image
4354+
common_remote_params params;
4355+
params.headers.push_back("User-Agent: llama.cpp/" + build_info);
4356+
params.max_size = 1024 * 1024 * 10; // 10MB
4357+
params.timeout = 10; // seconds
4358+
SRV_INF("downloading image from '%s'\n", url.c_str());
4359+
auto res = common_remote_get_content(url, params);
4360+
if (200 <= res.first && res.first < 300) {
4361+
SRV_INF("downloaded %ld bytes\n", res.second.size());
4362+
raw_buffer buf;
4363+
buf.insert(buf.end(), res.second.begin(), res.second.end());
4364+
files.push_back(buf);
4365+
} else {
4366+
throw std::runtime_error("Failed to download image");
4367+
}
4368+
} else {
4369+
// try to decode base64 image
4370+
std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
4371+
if (parts.size() != 2) {
4372+
throw std::runtime_error("Invalid image_url.url value");
4373+
} else if (!string_starts_with(parts[0], "data:image/")) {
4374+
throw std::runtime_error("Invalid image_url.url format: " + parts[0]);
4375+
} else if (!string_ends_with(parts[0], "base64")) {
4376+
throw std::runtime_error("image_url.url must be base64 encoded");
4377+
} else {
4378+
auto base64_data = parts[1];
4379+
auto decoded_data = base64_decode(base64_data);
4380+
files.push_back(decoded_data);
4381+
}
4382+
}
4383+
} else if (type == "input_audio" || type == "audio") {
4384+
if (!opt.allow_audio) {
4385+
throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
4386+
}
4387+
std::string format = json_value(m, "format", std::string());
4388+
// while we also support flac, we don't allow it here so we matches the OAI spec
4389+
if (format != "wav" && format != "mp3") {
4390+
throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
4391+
}
4392+
auto decoded_data = base64_decode(data); // expected to be base64 encoded
4393+
files.push_back(decoded_data);
4394+
}
4395+
}
4396+
}
4397+
43384398
handle_completions_impl(
43394399
SERVER_TASK_TYPE_COMPLETION,
43404400
data,

0 commit comments

Comments
 (0)