@@ -754,8 +754,9 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
754754
755755 // use creative settings to generate speakers
756756 const int topk = 20 ;
757+ const float top_p = 1 .0f ;
757758 const float temp = 1 .2f ;
758- llama_token new_token_id = kcpp_quick_sample (logits,ttc_n_vocab,topk,temp,speaker_rng);
759+ llama_token new_token_id = kcpp_quick_sample (logits,ttc_n_vocab,std::vector< int32_t >(), 1.0 ,top_p, topk,temp,speaker_rng);
759760
760761 // guide tokens help prevent hallucinations by forcing the TTS to use the correct word
761762 if (next_token_uses_guide_token && !llama_vocab_is_control (ttcvocab, new_token_id) && !llama_vocab_is_eog (ttcvocab, new_token_id))
@@ -876,7 +877,8 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
876877 // use predictable settings to generate voice
877878 const int topk = 4 ;
878879 const float temp = 0 .75f ;
879- llama_token new_token_id = kcpp_quick_sample (logits,ttc_n_vocab,topk,temp,tts_rng);
880+ const float top_p = 1 .0f ;
881+ llama_token new_token_id = kcpp_quick_sample (logits,ttc_n_vocab,std::vector<int32_t >(),1.0 ,top_p,topk,temp,speaker_rng);
880882
881883 // guide tokens help prevent hallucinations by forcing the TTS to use the correct word
882884 if (next_token_uses_guide_token && !llama_vocab_is_control (ttcvocab, new_token_id) && !llama_vocab_is_eog (ttcvocab, new_token_id))
@@ -933,7 +935,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
933935 const int n_codes = codes.size ();
934936 if (n_codes<=1 )
935937 {
936- printf (" \n Warning: TTS vocoder generated nothing !\n " );
938+ printf (" \n Warning: No Audio Tokens Produced !\n " );
937939 last_generated_audio = " " ;
938940 output.data = last_generated_audio.c_str ();
939941 output.status = 1 ;
@@ -963,12 +965,23 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
963965
964966 // audio = resample_wav(audio,n_sr,t_sr); //resample to 16k
965967
966- for (int i = 0 ; i < cutout; ++i) {
967- audio[i] = 0 .0f ;
968+ if (audio.size ()>cutout+16 )
969+ {
970+ for (int i = 0 ; i < cutout; ++i) {
971+ audio[i] = 0 .0f ;
972+ }
973+ // add some silence at the end
974+ for (int i = 0 ; i < cutout; ++i) {
975+ audio.push_back (0 .0f );
976+ }
968977 }
969- // add some silence at the end
970- for (int i = 0 ; i < cutout; ++i) {
971- audio.push_back (0 .0f );
978+ else
979+ {
980+ printf (" \n Warning: TTS vocoder generated nothing!\n " );
981+ last_generated_audio = " " ;
982+ output.data = last_generated_audio.c_str ();
983+ output.status = 1 ;
984+ return output;
972985 }
973986
974987 last_generated_audio = save_wav16_base64 (audio, t_sr);
0 commit comments