|
11 | 11 | #include "mtmd.h"
|
12 | 12 | #include "mtmd-helper.h"
|
13 | 13 |
|
14 |
| -#include <limits.h> |
15 |
| -#include <algorithm> |
16 |
| -#include <cmath> |
17 |
| -#include <cstring> |
18 |
| -#include <limits> |
19 |
| -#include <random> |
20 |
| -#include <string> |
21 |
| -#include <vector> |
22 |
| - |
23 | 14 | // mime type for sending response
|
24 | 15 | #define MIMETYPE_JSON "application/json; charset=utf-8"
|
25 | 16 |
|
@@ -3594,7 +3585,6 @@ struct server_context {
|
3594 | 3585 | }
|
3595 | 3586 |
|
3596 | 3587 | // TODO: support memory-less logits computation
|
3597 |
| - // Allow diffusion tasks to proceed as they handle logits differently |
3598 | 3588 | if (slot.need_logits() && !llama_get_memory(ctx) && slot.task_type != SERVER_TASK_TYPE_DIFFUSION) {
|
3599 | 3589 | slot.release();
|
3600 | 3590 | send_error(slot, "the current context does not logits computation. skipping", ERROR_TYPE_SERVER);
|
@@ -5411,6 +5401,8 @@ int main(int argc, char ** argv) {
|
5411 | 5401 | const std::function<bool()> & is_connection_closed,
|
5412 | 5402 | httplib::Response & res,
|
5413 | 5403 | oaicompat_type oaicompat) -> void {
|
| 5404 | + |
| 5405 | + type = llama_model_is_diffusion(ctx_server.model) ? SERVER_TASK_TYPE_DIFFUSION : type; |
5414 | 5406 | GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL || type == SERVER_TASK_TYPE_DIFFUSION);
|
5415 | 5407 |
|
5416 | 5408 | auto completion_id = gen_chatcmplid();
|
@@ -5532,17 +5524,11 @@ int main(int argc, char ** argv) {
|
5532 | 5524 | OAICOMPAT_TYPE_NONE);
|
5533 | 5525 | };
|
5534 | 5526 |
|
5535 |
| - const auto handle_completions_oai = [&ctx_server,&handle_completions_impl](const httplib::Request & req, httplib::Response & res) { |
| 5527 | + const auto handle_completions_oai = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) { |
5536 | 5528 | json data = oaicompat_completion_params_parse(json::parse(req.body));
|
5537 | 5529 | std::vector<raw_buffer> files; // dummy
|
5538 |
| - |
5539 |
| - // Check if this is a diffusion request by looking for diffusion-specific parameters |
5540 |
| - bool is_diffusion = llama_model_is_diffusion(ctx_server.model); |
5541 |
| - |
5542 |
| - server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION; |
5543 |
| - |
5544 | 5530 | handle_completions_impl(
|
5545 |
| - task_type, |
| 5531 | + SERVER_TASK_TYPE_COMPLETION, |
5546 | 5532 | data,
|
5547 | 5533 | files,
|
5548 | 5534 | req.is_connection_closed,
|
@@ -5639,13 +5625,8 @@ int main(int argc, char ** argv) {
|
5639 | 5625 | ctx_server.oai_parser_opt,
|
5640 | 5626 | files);
|
5641 | 5627 |
|
5642 |
| - // Check if this is a diffusion request by looking for diffusion-specific parameters |
5643 |
| - bool is_diffusion = llama_model_is_diffusion(ctx_server.model); |
5644 |
| - |
5645 |
| - server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION; |
5646 |
| - |
5647 | 5628 | handle_completions_impl(
|
5648 |
| - task_type, |
| 5629 | + SERVER_TASK_TYPE_COMPLETION, |
5649 | 5630 | data,
|
5650 | 5631 | files,
|
5651 | 5632 | req.is_connection_closed,
|
|
0 commit comments