Skip to content

Commit 647d7cc

Browse files
committed
Yet another fix for inference server
1 parent ff6dd4a commit 647d7cc

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

nemo/collections/nlp/modules/common/text_generation_server.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -286,16 +286,20 @@ def chat_completion(self, data):
286286

287287
# Remove suffix.
288288
eot = special_tokens['end_of_turn']
289-
for e in end_strings:
290-
# This code is meant to be somewhat generic (even if the above code is not):
291-
# - If we stop on "end_of_turn", then we strip "end_of_turn" (ex: "<|eot_id|>")
292-
# - If we stop on an end string that follows "end_of_turn", then we strip both "end_of_turn"
293-
# and that end string (ex: "\n<extra_id_1>")
294-
suffix = e if e == eot else (eot + e)
295-
# The loop is very Llama-Instruct-specific, due to how "<|eot_id|>" is also the padding
296-
# EOS token => it may be present multiple times.
297-
while output_sentence.endswith(suffix):
298-
output_sentence = output_sentence.removesuffix(suffix)
289+
done = False
290+
while not done:
291+
done = True
292+
for e in end_strings:
293+
# This code is meant to be somewhat generic (even if the above code is not):
294+
# - If we stop on "end_of_turn", then we strip "end_of_turn" (ex: "<|eot_id|>")
295+
# - If we stop on an end string that follows "end_of_turn", then we strip both "end_of_turn"
296+
# and that end string (ex: "\n<extra_id_1>")
297+
suffix = e if e == eot else (eot + e)
298+
# The loop is very Llama-Instruct-specific, due to how "<|eot_id|>" is also the padding
299+
# EOS token => it may be present multiple times.
300+
while output_sentence.endswith(suffix):
301+
output_sentence = output_sentence.removesuffix(suffix)
302+
done = False
299303

300304
print(f"TRIMMED OUTPUT:\n```{output_sentence}```")
301305

0 commit comments

Comments
 (0)