We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 276cfa0 + 69518b2 commit 369ea80Copy full SHA for 369ea80
cosyvoice/llm/llm.py
@@ -382,7 +382,10 @@ def inference_bistream(
382
if text_cache.size(1) >= self.mix_ratio[0]:
383
lm_input_text = text_cache[:, :self.mix_ratio[0]]
384
logging.info('append {} text token'.format(lm_input_text.size(1)))
385
- lm_input = torch.concat([lm_input, lm_input_text], dim=1)
+ if len(out_tokens) != 0 and out_tokens[-1] == self.speech_token_size + 2:
386
+ lm_input = lm_input_text
387
+ else:
388
+ lm_input = torch.concat([lm_input, lm_input_text], dim=1)
389
text_cache = text_cache[:, self.mix_ratio[0]:]
390
else:
391
logging.info('not enough text token to decode, wait for more')
0 commit comments