Skip to content

Commit 25e1b67

Browse files
Added code to generate the default voices (#144)
1 parent 5df2f4c commit 25e1b67

File tree

4 files changed

+51
-30
lines changed

4 files changed

+51
-30
lines changed

pocket_tts/static/index.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ <h1 class="text-xl font-bold text-center">Pocket TTS</h1>
4545
<p class="text-xs text-gray-500 mt-1">
4646
Supports: http://, https://, or hf:// URLs.<br>
4747
You can also use predefined voices:<br>
48-
"alba", "marius", "javert", "jean", "fantine", "cosette", "eponine", "azelma".
48+
"alba", "anna", "azelma", "bill_boerst", "caro_davy", "charles", "eponine", "eve",
49+
"fantine", "george", "jane", "jean", "mary", "michael", "paul", "peter_yearsley", "stuart_bell", "vera".
4950
<br/>
5051
You can find more voices in our
5152
<a

pocket_tts/utils/utils.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,34 @@
1111

1212
PROJECT_ROOT = Path(__file__).parent.parent.parent
1313

14-
_voices_names = ["alba", "marius", "javert", "jean", "fantine", "cosette", "eponine", "azelma"]
14+
_ORIGINS_OF_PREDEFINED_VOICES = {
15+
"cosette": "hf://kyutai/tts-voices/expresso/ex04-ex02_confused_001_channel1_499s.wav",
16+
"marius": "hf://kyutai/tts-voices/voice-donations/Selfie.wav",
17+
"javert": "hf://kyutai/tts-voices/voice-donations/Butter.wav",
18+
"alba": "hf://kyutai/tts-voices/alba-mackenna/casual.wav",
19+
"jean": "hf://kyutai/tts-voices/ears/p010/freeform_speech_01_enhanced.wav",
20+
"anna": "hf://kyutai/tts-voices/vctk/p228_023_enhanced.wav",
21+
"vera": "hf://kyutai/tts-voices/vctk/p229_023_enhanced.wav",
22+
"fantine": "hf://kyutai/tts-voices/vctk/p244_023_enhanced.wav",
23+
"charles": "hf://kyutai/tts-voices/vctk/p254_023_enhanced.wav",
24+
"paul": "hf://kyutai/tts-voices/vctk/p259_023_enhanced.wav",
25+
"eponine": "hf://kyutai/tts-voices/vctk/p262_023_enhanced.wav",
26+
"azelma": "hf://kyutai/tts-voices/vctk/p303_023_enhanced.wav",
27+
"george": "hf://kyutai/tts-voices/vctk/p315_023_enhanced.wav",
28+
"mary": "hf://kyutai/tts-voices/vctk/p333_023_enhanced.wav",
29+
"jane": "hf://kyutai/tts-voices/vctk/p339_023_enhanced.wav",
30+
"michael": "hf://kyutai/tts-voices/vctk/p360_023_enhanced.wav",
31+
"eve": "hf://kyutai/tts-voices/vctk/p361_023_enhanced.wav",
32+
"bill_boerst": "hf://kyutai/tts-voices/voice-zero/bill_boerst.wav",
33+
"peter_yearsley": "hf://kyutai/tts-voices/voice-zero/peter_yearsley.wav",
34+
"stuart_bell": "hf://kyutai/tts-voices/voice-zero/stuart_bell.wav",
35+
"caro_davy": "hf://kyutai/tts-voices/voice-zero/caro_davy.wav",
36+
}
37+
1538
PREDEFINED_VOICES = {
1639
# don't forget to change this
17-
x: f"hf://kyutai/pocket-tts-without-voice-cloning/embeddings_v2/{x}.safetensors@2578fed2380333b621689eaed6fe144cf69dfeb3"
18-
for x in _voices_names
40+
x: f"hf://kyutai/pocket-tts-without-voice-cloning/embeddings_v3/{x}.safetensors@075c0abfe7e41450521b0200b5168cfbc16bc77b"
41+
for x in _ORIGINS_OF_PREDEFINED_VOICES
1942
}
2043

2144

scripts/generate_default_voices.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import scipy.io.wavfile
2+
3+
from pocket_tts import TTSModel, export_model_state
4+
from pocket_tts.utils.utils import _ORIGINS_OF_PREDEFINED_VOICES
5+
6+
model = TTSModel.load_model()
7+
8+
for voice_name, voice_origin in _ORIGINS_OF_PREDEFINED_VOICES.items():
9+
print(f"Processing voice: {voice_name} from origin: {voice_origin}")
10+
# Export a voice state for fast loading later
11+
model_state = model.get_state_for_audio_prompt(voice_origin)
12+
export_model_state(model_state, f"./built-in-voices/{voice_name}.safetensors")
13+
14+
model_state_copy = model.get_state_for_audio_prompt(
15+
f"./built-in-voices/{voice_name}.safetensors"
16+
)
17+
18+
audio = model.generate_audio(
19+
model_state_copy, "Hello, it's a good day, isn't it? I hope you are doing well."
20+
)
21+
scipy.io.wavfile.write(
22+
f"./built-in-voices-generated/{voice_name}.wav", model.sample_rate, audio.numpy()
23+
)

scripts/generate_default_voices.sh

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)