@@ -1885,6 +1885,7 @@ struct server_context {
18851885 float slot_prompt_similarity = 0 .0f ;
18861886
18871887 common_chat_templates_ptr chat_templates;
1888+ oaicompat_parser_options oai_parser_opt;
18881889
18891890 ~server_context () {
18901891 mtmd_free (mctx);
@@ -2080,6 +2081,15 @@ struct server_context {
20802081 }
20812082
20822083 metrics.init ();
2084+
2085+ oai_parser_opt = {
2086+ /* use_jinja */ params_base.use_jinja ,
2087+ /* prefill_assistant */ params_base.prefill_assistant ,
2088+ /* reasoning_format */ params_base.reasoning_format ,
2089+ /* common_chat_templates */ chat_templates.get (),
2090+ /* allow_image */ mctx ? mtmd_support_vision (mctx) : false ,
2091+ /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false ,
2092+ };
20832093 }
20842094
20852095 server_slot * get_slot_by_id (int id) {
@@ -4089,7 +4099,10 @@ int main(int argc, char ** argv) {
40894099 { " default_generation_settings" , ctx_server.default_generation_settings_for_props },
40904100 { " total_slots" , ctx_server.params_base .n_parallel },
40914101 { " model_path" , ctx_server.params_base .model .path },
4092- { " modalities" , json{{" vision" , ctx_server.mctx != nullptr }} }, // TODO: add more in the future
4102+ { " modalities" , json{
4103+ {" vision" , ctx_server.oai_parser_opt .allow_image },
4104+ {" audio" , ctx_server.oai_parser_opt .allow_audio },
4105+ } },
40934106 { " chat_template" , common_chat_templates_source (ctx_server.chat_templates .get ()) },
40944107 { " bos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_bos (ctx_server.vocab ), /* special= */ true )},
40954108 { " eos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_eos (ctx_server.vocab ), /* special= */ true )},
@@ -4180,10 +4193,10 @@ int main(int argc, char ** argv) {
41804193 for (auto & file : files) {
41814194 mtmd::bitmap bmp (mtmd_helper_bitmap_init_from_buf (file.data (), file.size ()));
41824195 if (!bmp.ptr ) {
4183- throw std::runtime_error (" Failed to load image" );
4196+ throw std::runtime_error (" Failed to load image or audio file " );
41844197 }
41854198 // calculate bitmap hash (for KV caching)
4186- std::string hash = fnv_hash (bmp.data (), bmp.nx ()*bmp. ny ()* 3 );
4199+ std::string hash = fnv_hash (bmp.data (), bmp.n_bytes () );
41874200 bmp.set_id (hash.c_str ());
41884201 bitmaps.entries .push_back (std::move (bmp));
41894202 }
@@ -4415,7 +4428,7 @@ int main(int argc, char ** argv) {
44154428 OAICOMPAT_TYPE_NONE); // infill is not OAI compatible
44164429 };
44174430
4418- const auto handle_chat_completions = [&ctx_server, ¶ms, & res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4431+ const auto handle_chat_completions = [&ctx_server, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
44194432 LOG_DBG (" request: %s\n " , req.body .c_str ());
44204433 if (ctx_server.params_base .embedding ) {
44214434 res_error (res, format_error_response (" This server does not support completions. Start it without `--embeddings`" , ERROR_TYPE_NOT_SUPPORTED));
@@ -4424,13 +4437,9 @@ int main(int argc, char ** argv) {
44244437
44254438 auto body = json::parse (req.body );
44264439 std::vector<raw_buffer> files;
4427- json data = oaicompat_completion_params_parse (
4440+ json data = oaicompat_chat_params_parse (
44284441 body,
4429- params.use_jinja ,
4430- params.prefill_assistant ,
4431- params.reasoning_format ,
4432- ctx_server.chat_templates .get (),
4433- ctx_server.mctx ,
4442+ ctx_server.oai_parser_opt ,
44344443 files);
44354444
44364445 handle_completions_impl (
@@ -4443,16 +4452,12 @@ int main(int argc, char ** argv) {
44434452 };
44444453
44454454 // same with handle_chat_completions, but without inference part
4446- const auto handle_apply_template = [&ctx_server, ¶ms, & res_ok](const httplib::Request & req, httplib::Response & res) {
4455+ const auto handle_apply_template = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
44474456 auto body = json::parse (req.body );
44484457 std::vector<raw_buffer> files; // dummy, unused
4449- json data = oaicompat_completion_params_parse (
4458+ json data = oaicompat_chat_params_parse (
44504459 body,
4451- params.use_jinja ,
4452- params.prefill_assistant ,
4453- params.reasoning_format ,
4454- ctx_server.chat_templates .get (),
4455- ctx_server.mctx ,
4460+ ctx_server.oai_parser_opt ,
44564461 files);
44574462 res_ok (res, {{ " prompt" , std::move (data.at (" prompt" )) }});
44584463 };
0 commit comments