@@ -722,7 +722,6 @@ struct callback_data {
722
722
int32_t n_input;
723
723
void * slot; // For streaming updates (server_slot*)
724
724
void * ctx_server; // For sending partial responses (server_context*)
725
- std::string last_sent_text; // Track last sent text for delta calculation
726
725
llama_token * last_tokens; // Track last tokens for partial text decoding
727
726
};
728
727
@@ -4115,17 +4114,6 @@ struct server_context {
4115
4114
// send the complete text as a single chunk before the final response
4116
4115
// In streaming mode with callbacks, the text was already sent incrementally
4117
4116
if (slot.params .stream ) {
4118
- // // Check if we need to send any remaining text that wasn't sent by callback
4119
- // if (cb_data.last_sent_text != output_text && !output_text.empty()) {
4120
- // std::string remaining_text = output_text.substr(cb_data.last_sent_text.length());
4121
- // if (!remaining_text.empty()) {
4122
- // completion_token_output result;
4123
- // result.tok = -1;
4124
- // result.text_to_send = remaining_text;
4125
- // result.prob = 1.0f;
4126
- // send_partial_response(slot, result, false);
4127
- // }
4128
- // }
4129
4117
slot.generated_text = " " ; // clear to avoid resending
4130
4118
send_final_response (slot);
4131
4119
} else if (!output_text.empty ()) {
@@ -4137,7 +4125,6 @@ struct server_context {
4137
4125
send_partial_response (slot, result, false );
4138
4126
send_final_response (slot);
4139
4127
}
4140
-
4141
4128
4142
4129
} else {
4143
4130
send_error (slot, " Diffusion generation failed" );
@@ -4816,11 +4803,6 @@ static bool diffusion_step_callback(int32_t step,
4816
4803
// Always send on first step, last step, or at regular intervals
4817
4804
bool should_send = (step == 0 ) ||
4818
4805
(step == total_steps - 1 );
4819
-
4820
- // // Also send if text has changed significantly (more tokens decoded)
4821
- // if (!should_send && current_text.length() > data->last_sent_text.length() + 10) {
4822
- // should_send = true;
4823
- // }
4824
4806
4825
4807
// for chat/completions
4826
4808
if (true ) {
@@ -4851,7 +4833,7 @@ static bool diffusion_step_callback(int32_t step,
4851
4833
has_changes = true ;
4852
4834
4853
4835
SRV_INF (" Token changes at step %d: %zu positions changed\n " , step, content_array.size ());
4854
- SRV_INF (" Delta JSON: %s\n " , delta_json.dump ().c_str ());
4836
+ SRV_DBG (" Delta JSON: %s\n " , delta_json.dump ().c_str ());
4855
4837
}
4856
4838
}
4857
4839
@@ -4866,9 +4848,6 @@ static bool diffusion_step_callback(int32_t step,
4866
4848
4867
4849
// Use is_progress=false to send actual content instead of progress info
4868
4850
ctx_server->send_partial_response (*slot, progress_token, false );
4869
-
4870
- // Update last sent text
4871
- data->last_sent_text = current_text;
4872
4851
}
4873
4852
}
4874
4853
}
0 commit comments