@@ -4332,9 +4332,69 @@ int main(int argc, char ** argv) {
43324332 }
43334333 };
43344334
4335- const auto handle_completions = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4335+ const auto handle_completions = [&ctx_server, & handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
43364336 json data = json::parse (req.body );
4337- std::vector<raw_buffer> files; // dummy
4337+ json & medias = json_value (data, " medias" , json::array ());
4338+ auto & opt = ctx_server.oai_parser_opt ;
4339+ std::vector<raw_buffer> files;
4340+
4341+ if (medias.is_array ()) {
4342+ for (auto & m : medias) {
4343+ std::string type = json_value (m, " type" , std::string ());
4344+ std::string data = json_value (m, " data" , std::string ());
4345+ if (type.empty () || data.empty ()) {
4346+ continue ;
4347+ }
4348+ if (type == " image_url" || type == " image" || type == " img" ) {
4349+ if (!opt.allow_image ) {
4350+ throw std::runtime_error (" image input is not supported - hint: if this is unexpected, you may need to provide the mmproj" );
4351+ }
4352+ if (string_starts_with (data, " http" )) {
4353+ // download remote image
4354+ common_remote_params params;
4355+ params.headers .push_back (" User-Agent: llama.cpp/" + build_info);
4356+ params.max_size = 1024 * 1024 * 10 ; // 10MB
4357+ params.timeout = 10 ; // seconds
4358+ SRV_INF (" downloading image from '%s'\n " , url.c_str ());
4359+ auto res = common_remote_get_content (url, params);
4360+ if (200 <= res.first && res.first < 300 ) {
4361+ SRV_INF (" downloaded %ld bytes\n " , res.second .size ());
4362+ raw_buffer buf;
4363+ buf.insert (buf.end (), res.second .begin (), res.second .end ());
4364+ files.push_back (buf);
4365+ } else {
4366+ throw std::runtime_error (" Failed to download image" );
4367+ }
4368+ } else {
4369+ // try to decode base64 image
4370+ std::vector<std::string> parts = string_split<std::string>(url, /* separator*/ ' ,' );
4371+ if (parts.size () != 2 ) {
4372+ throw std::runtime_error (" Invalid image_url.url value" );
4373+ } else if (!string_starts_with (parts[0 ], " data:image/" )) {
4374+ throw std::runtime_error (" Invalid image_url.url format: " + parts[0 ]);
4375+ } else if (!string_ends_with (parts[0 ], " base64" )) {
4376+ throw std::runtime_error (" image_url.url must be base64 encoded" );
4377+ } else {
4378+ auto base64_data = parts[1 ];
4379+ auto decoded_data = base64_decode (base64_data);
4380+ files.push_back (decoded_data);
4381+ }
4382+ }
4383+ } else if (type == " input_audio" || type == " audio" ) {
4384+ if (!opt.allow_audio ) {
4385+ throw std::runtime_error (" audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj" );
4386+ }
4387+ std::string format = json_value (m, " format" , std::string ());
4388+ // while we also support flac, we don't allow it here so we matches the OAI spec
4389+ if (format != " wav" && format != " mp3" ) {
4390+ throw std::runtime_error (" input_audio.format must be either 'wav' or 'mp3'" );
4391+ }
4392+ auto decoded_data = base64_decode (data); // expected to be base64 encoded
4393+ files.push_back (decoded_data);
4394+ }
4395+ }
4396+ }
4397+
43384398 handle_completions_impl (
43394399 SERVER_TASK_TYPE_COMPLETION,
43404400 data,
0 commit comments