@@ -1891,6 +1891,7 @@ struct server_context {
18911891 float slot_prompt_similarity = 0 .0f ;
18921892
18931893 common_chat_templates_ptr chat_templates;
1894+ oaicompat_parser_options oai_parser_opt;
18941895
18951896 ~server_context () {
18961897 mtmd_free (mctx);
@@ -2086,6 +2087,15 @@ struct server_context {
20862087 }
20872088
20882089 metrics.init ();
2090+
2091+ oai_parser_opt = {
2092+ /* use_jinja */ params_base.use_jinja ,
2093+ /* prefill_assistant */ params_base.prefill_assistant ,
2094+ /* reasoning_format */ params_base.reasoning_format ,
2095+ /* common_chat_templates */ chat_templates.get (),
2096+ /* allow_image */ mctx ? mtmd_support_vision (mctx) : false ,
2097+ /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false ,
2098+ };
20892099 }
20902100
20912101 server_slot * get_slot_by_id (int id) {
@@ -4092,7 +4102,10 @@ int main(int argc, char ** argv) {
40924102 { " default_generation_settings" , ctx_server.default_generation_settings_for_props },
40934103 { " total_slots" , ctx_server.params_base .n_parallel },
40944104 { " model_path" , ctx_server.params_base .model .path },
4095- { " modalities" , json{{" vision" , ctx_server.mctx != nullptr }} }, // TODO: add more in the future
4105+ { " modalities" , json{
4106+ {" vision" , ctx_server.oai_parser_opt .allow_image },
4107+ {" audio" , ctx_server.oai_parser_opt .allow_audio },
4108+ } },
40964109 { " chat_template" , common_chat_templates_source (ctx_server.chat_templates .get ()) },
40974110 { " bos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_bos (ctx_server.vocab ), /* special= */ true )},
40984111 { " eos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_eos (ctx_server.vocab ), /* special= */ true )},
@@ -4183,10 +4196,10 @@ int main(int argc, char ** argv) {
41834196 for (auto & file : files) {
41844197 mtmd::bitmap bmp (mtmd_helper_bitmap_init_from_buf (file.data (), file.size ()));
41854198 if (!bmp.ptr ) {
4186- throw std::runtime_error (" Failed to load image" );
4199+ throw std::runtime_error (" Failed to load image or audio file " );
41874200 }
41884201 // calculate bitmap hash (for KV caching)
4189- std::string hash = fnv_hash (bmp.data (), bmp.nx ()*bmp. ny ()* 3 );
4202+ std::string hash = fnv_hash (bmp.data (), bmp.n_bytes () );
41904203 bmp.set_id (hash.c_str ());
41914204 bitmaps.entries .push_back (std::move (bmp));
41924205 }
@@ -4418,7 +4431,7 @@ int main(int argc, char ** argv) {
44184431 OAICOMPAT_TYPE_NONE); // infill is not OAI compatible
44194432 };
44204433
4421- const auto handle_chat_completions = [&ctx_server, ¶ms, & res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4434+ const auto handle_chat_completions = [&ctx_server, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
44224435 LOG_DBG (" request: %s\n " , req.body .c_str ());
44234436 if (ctx_server.params_base .embedding ) {
44244437 res_error (res, format_error_response (" This server does not support completions. Start it without `--embeddings`" , ERROR_TYPE_NOT_SUPPORTED));
@@ -4427,13 +4440,9 @@ int main(int argc, char ** argv) {
44274440
44284441 auto body = json::parse (req.body );
44294442 std::vector<raw_buffer> files;
4430- json data = oaicompat_completion_params_parse (
4443+ json data = oaicompat_chat_params_parse (
44314444 body,
4432- params.use_jinja ,
4433- params.prefill_assistant ,
4434- params.reasoning_format ,
4435- ctx_server.chat_templates .get (),
4436- ctx_server.mctx ,
4445+ ctx_server.oai_parser_opt ,
44374446 files);
44384447
44394448 handle_completions_impl (
@@ -4446,16 +4455,12 @@ int main(int argc, char ** argv) {
44464455 };
44474456
44484457 // same with handle_chat_completions, but without inference part
4449- const auto handle_apply_template = [&ctx_server, ¶ms, & res_ok](const httplib::Request & req, httplib::Response & res) {
4458+ const auto handle_apply_template = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
44504459 auto body = json::parse (req.body );
44514460 std::vector<raw_buffer> files; // dummy, unused
4452- json data = oaicompat_completion_params_parse (
4461+ json data = oaicompat_chat_params_parse (
44534462 body,
4454- params.use_jinja ,
4455- params.prefill_assistant ,
4456- params.reasoning_format ,
4457- ctx_server.chat_templates .get (),
4458- ctx_server.mctx ,
4463+ ctx_server.oai_parser_opt ,
44594464 files);
44604465 res_ok (res, {{ " prompt" , std::move (data.at (" prompt" )) }});
44614466 };
0 commit comments