Skip to content

Commit 893d421

Browse files
author
anyshu
committed
修复diffusion streaming模式下的结束chunk的信息错误
1 parent de1880b commit 893d421

File tree

1 file changed

+53
-37
lines changed

1 file changed

+53
-37
lines changed

tools/server/server-diffusion.cpp

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,14 @@ struct server_task_result_cmpl_partial : server_task_result {
13041304
}
13051305
} else if (!content.empty() && !is_progress) {
13061306
// For diffusion or other tasks without diffs, send content directly
1307-
add_delta({{"content", content}});
1307+
// Try to parse content as JSON first (for diffusion delta arrays)
1308+
try {
1309+
json content_json = json::parse(content);
1310+
add_delta({{"content", content_json}});
1311+
} catch (...) {
1312+
// If parsing fails, treat as plain string
1313+
add_delta({{"content", content}});
1314+
}
13081315
}
13091316

13101317
if (!deltas.empty()) {
@@ -4108,27 +4115,30 @@ struct server_context {
41084115
// send the complete text as a single chunk before the final response
41094116
// In streaming mode with callbacks, the text was already sent incrementally
41104117
if (slot.params.stream) {
4111-
// Check if we need to send any remaining text that wasn't sent by callback
4112-
if (cb_data.last_sent_text != output_text && !output_text.empty()) {
4113-
std::string remaining_text = output_text.substr(cb_data.last_sent_text.length());
4114-
if (!remaining_text.empty()) {
4115-
completion_token_output result;
4116-
result.tok = -1;
4117-
result.text_to_send = remaining_text;
4118-
result.prob = 1.0f;
4119-
send_partial_response(slot, result, false);
4120-
}
4121-
}
4118+
// // Check if we need to send any remaining text that wasn't sent by callback
4119+
// if (cb_data.last_sent_text != output_text && !output_text.empty()) {
4120+
// std::string remaining_text = output_text.substr(cb_data.last_sent_text.length());
4121+
// if (!remaining_text.empty()) {
4122+
// completion_token_output result;
4123+
// result.tok = -1;
4124+
// result.text_to_send = remaining_text;
4125+
// result.prob = 1.0f;
4126+
// send_partial_response(slot, result, false);
4127+
// }
4128+
// }
4129+
slot.generated_text = ""; // clear to avoid resending
4130+
send_final_response(slot);
41224131
} else if (!output_text.empty()) {
41234132
// Non-streaming: send complete text at once
41244133
completion_token_output result;
41254134
result.tok = -1;
41264135
result.text_to_send = output_text;
41274136
result.prob = 1.0f;
41284137
send_partial_response(slot, result, false);
4138+
send_final_response(slot);
41294139
}
41304140

4131-
send_final_response(slot);
4141+
41324142
} else {
41334143
send_error(slot, "Diffusion generation failed");
41344144
}
@@ -4807,45 +4817,51 @@ static bool diffusion_step_callback(int32_t step,
48074817
bool should_send = (step == 0) ||
48084818
(step == total_steps - 1);
48094819

4810-
// Also send if text has changed significantly (more tokens decoded)
4811-
if (!should_send && current_text.length() > data->last_sent_text.length() + 10) {
4812-
should_send = true;
4813-
}
4820+
// // Also send if text has changed significantly (more tokens decoded)
4821+
// if (!should_send && current_text.length() > data->last_sent_text.length() + 10) {
4822+
// should_send = true;
4823+
// }
48144824

48154825
//for chat/completions
4816-
if (should_send) {
4817-
std::string delta_text;
4818-
// Track token changes for debugging
4826+
if (true) {
4827+
json delta_json;
4828+
bool has_changes = false;
4829+
4830+
// Track token changes and build JSON array
48194831
if (data->last_tokens && step > 0) {
4820-
int32_t changed_tokens = 0;
4821-
std::string changes_debug;
4832+
json content_array = json::array();
4833+
48224834
for (int32_t i = data->n_input; i < n_tokens && i < data->diff_params->max_length; i++) {
48234835
if (data->last_tokens[i] != tokens[i]) {
4824-
changed_tokens++;
4825-
if (changes_debug.length() < 200) { // Limit debug output
4826-
char old_piece[64], new_piece[64];
4827-
int old_n_chars = llama_token_to_piece(data->vocab, data->last_tokens[i], old_piece, sizeof(old_piece), 0, false);
4828-
int new_n_chars = llama_token_to_piece(data->vocab, tokens[i], new_piece, sizeof(new_piece), 0, false);
4829-
old_piece[old_n_chars] = '\0';
4836+
char new_piece[256];
4837+
int new_n_chars = llama_token_to_piece(data->vocab, tokens[i], new_piece, sizeof(new_piece), 0, false);
4838+
if (new_n_chars > 0) {
48304839
new_piece[new_n_chars] = '\0';
4831-
changes_debug += string_format("[%d: '%s'->'%s'] ", i - data->n_input, old_piece, new_piece);
4840+
json change_obj = {
4841+
{"position", i - data->n_input},
4842+
{"text", std::string(new_piece)}
4843+
};
4844+
content_array.push_back(change_obj);
48324845
}
48334846
}
48344847
}
4835-
if (changed_tokens > 0) {
4836-
delta_text = string_format("Token changes at step %d: %d tokens changed - %s\n",
4837-
step, changed_tokens, changes_debug.c_str());
4838-
SRV_INF("%s", delta_text.c_str());
4848+
4849+
if (!content_array.empty()) {
4850+
delta_json = content_array;
4851+
has_changes = true;
4852+
4853+
SRV_INF("Token changes at step %d: %zu positions changed\n", step, content_array.size());
4854+
SRV_INF("Delta JSON: %s\n", delta_json.dump().c_str());
48394855
}
48404856
}
48414857

4842-
if (!delta_text.empty()) {
4843-
SRV_INF("Sending diffusion delta: step=%d/%d, delta_len=%zu, delta=%s\n",
4844-
step, total_steps, delta_text.length(), delta_text.c_str());
4858+
if (has_changes) {
4859+
SRV_INF("Sending diffusion delta: step=%d/%d, changes=%zu\n",
4860+
step, total_steps, delta_json.size());
48454861

48464862
completion_token_output progress_token;
48474863
progress_token.tok = -1; // Special value for progress
4848-
progress_token.text_to_send = delta_text;
4864+
progress_token.text_to_send = delta_json.dump(); // Serialize JSON to string
48494865
progress_token.prob = 1.0f;
48504866

48514867
// Use is_progress=false to send actual content instead of progress info

0 commit comments

Comments
 (0)