Skip to content

Commit 6848a0a

Browse files
authored
fix bugs (#870)
Co-authored-by: firecoperana <firecoperana>
1 parent f76e985 commit 6848a0a

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

examples/server/server.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,7 @@ struct server_slot {
788788

789789
pos = text.find(word, from_pos);
790790
} else {
791-
pos = string_find_partial_stop(word, text);
791+
pos = string_find_partial_stop(text, word);
792792
}
793793

794794
if (pos != std::string::npos && (stop_pos == std::string::npos || pos < stop_pos)) {
@@ -1960,31 +1960,28 @@ struct server_context {
19601960
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
19611961

19621962
const std::string str_test = slot.generated_text.substr(pos);
1963-
bool is_stop_full = false;
1963+
bool send_text = true;
19641964

19651965
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), true);
19661966
if (stop_pos != std::string::npos) {
1967-
is_stop_full = true;
19681967
slot.generated_text.erase(
19691968
slot.generated_text.begin() + pos + stop_pos,
19701969
slot.generated_text.end());
1971-
// Update n_sent_text to not exceed the new generated_text size
1972-
slot.n_sent_text = std::min(slot.n_sent_text, slot.generated_text.size());
1973-
pos = slot.n_sent_text;
1974-
} else {
1975-
is_stop_full = false;
1976-
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), false);
1970+
pos = std::min(slot.n_sent_text, slot.generated_text.size());
1971+
}
1972+
else if (slot.has_next_token && !llama_token_is_eog(model, result.tok)) {
1973+
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), false);
1974+
send_text = stop_pos == std::string::npos;
19771975
}
19781976

19791977
// check if there is any token to predict
1980-
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
1978+
if (send_text) {
19811979
// no send the stop word in the response
19821980
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
19831981
slot.n_sent_text += result.text_to_send.size();
19841982
// add the token to slot queue and cache
1985-
} else if (stop_pos != std::string::npos) {
1986-
// Handle partial stop - update n_sent_text to the end of the current text
1987-
slot.n_sent_text = slot.generated_text.size();
1983+
} else {
1984+
result.text_to_send = "";
19881985
}
19891986

19901987
slot.add_token_string(result);

0 commit comments

Comments
 (0)