@@ -913,7 +913,6 @@ struct swa_checkpoint {
913
913
std::vector<uint8_t > data;
914
914
};
915
915
916
- // last output chunk, sent when the generation is finished
917
916
struct server_task_result_cmpl_final : server_task_result {
918
917
int index = 0 ;
919
918
@@ -4101,7 +4100,7 @@ struct server_context {
4101
4100
4102
4101
SRV_INF (" Diffusion generation completed: n_generated=%d, generated_size=%zu, filtered_size=%zu, output_text_length=%zu\n " ,
4103
4102
n_generated, generated_tokens.size (), filtered_tokens.size (), output_text.size ());
4104
- SRV_INF (" Generated text preview (first 500 chars): %.500s \n " , output_text.c_str ());
4103
+ SRV_INF (" Generated text preview (first 1000 chars): %.1000s \n " , output_text.c_str ());
4105
4104
4106
4105
slot.generated_text = output_text;
4107
4106
slot.generated_tokens = filtered_tokens;
@@ -5534,15 +5533,12 @@ int main(int argc, char ** argv) {
5534
5533
OAICOMPAT_TYPE_NONE);
5535
5534
};
5536
5535
5537
- const auto handle_completions_oai = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
5536
+ const auto handle_completions_oai = [&ctx_server,& handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
5538
5537
json data = oaicompat_completion_params_parse (json::parse (req.body ));
5539
5538
std::vector<raw_buffer> files; // dummy
5540
5539
5541
5540
// Check if this is a diffusion request by looking for diffusion-specific parameters
5542
- bool is_diffusion = data.contains (" diffusion_steps" ) ||
5543
- data.contains (" diffusion_algorithm" ) ||
5544
- data.contains (" cfg_scale" ) ||
5545
- data.contains (" visual_mode" );
5541
+ bool is_diffusion = llama_model_is_diffusion (ctx_server.model );
5546
5542
5547
5543
server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION;
5548
5544
@@ -5645,11 +5641,7 @@ int main(int argc, char ** argv) {
5645
5641
files);
5646
5642
5647
5643
// Check if this is a diffusion request by looking for diffusion-specific parameters
5648
- bool is_diffusion = data.contains (" diffusion_steps" ) ||
5649
- data.contains (" diffusion_algorithm" ) ||
5650
- data.contains (" cfg_scale" ) ||
5651
- data.contains (" visual_mode" ) ||
5652
- data.contains (" max_length" );
5644
+ bool is_diffusion = llama_model_is_diffusion (ctx_server.model )
5653
5645
5654
5646
server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION;
5655
5647
0 commit comments