Added code to generate the default voices (#144)

gabrieldemarmiesse · web-flow · commit 25e1b673cd4f · 2026-03-12T18:12:47.000+01:00
diff --git a/pocket_tts/static/index.html b/pocket_tts/static/index.html
@@ -45,7 +45,8 @@ <h1 class="text-xl font-bold text-center">Pocket TTS</h1>
             <p class="text-xs text-gray-500 mt-1">
                 Supports: http://, https://, or hf:// URLs.<br>
                 You can also use predefined voices:<br>
-                "alba", "marius", "javert", "jean", "fantine", "cosette", "eponine", "azelma".
+                "alba", "anna", "azelma", "bill_boerst", "caro_davy", "charles", "eponine", "eve",
+                "fantine", "george", "jane", "jean", "mary", "michael", "paul", "peter_yearsley", "stuart_bell", "vera".
                 <br/>
                 You can find more voices in our
                 <a
diff --git a/pocket_tts/utils/utils.py b/pocket_tts/utils/utils.py
@@ -11,11 +11,34 @@
 
 PROJECT_ROOT = Path(__file__).parent.parent.parent
 
-_voices_names = ["alba", "marius", "javert", "jean", "fantine", "cosette", "eponine", "azelma"]
+_ORIGINS_OF_PREDEFINED_VOICES = {
+    "cosette": "hf://kyutai/tts-voices/expresso/ex04-ex02_confused_001_channel1_499s.wav",
+    "marius": "hf://kyutai/tts-voices/voice-donations/Selfie.wav",
+    "javert": "hf://kyutai/tts-voices/voice-donations/Butter.wav",
+    "alba": "hf://kyutai/tts-voices/alba-mackenna/casual.wav",
+    "jean": "hf://kyutai/tts-voices/ears/p010/freeform_speech_01_enhanced.wav",
+    "anna": "hf://kyutai/tts-voices/vctk/p228_023_enhanced.wav",
+    "vera": "hf://kyutai/tts-voices/vctk/p229_023_enhanced.wav",
+    "fantine": "hf://kyutai/tts-voices/vctk/p244_023_enhanced.wav",
+    "charles": "hf://kyutai/tts-voices/vctk/p254_023_enhanced.wav",
+    "paul": "hf://kyutai/tts-voices/vctk/p259_023_enhanced.wav",
+    "eponine": "hf://kyutai/tts-voices/vctk/p262_023_enhanced.wav",
+    "azelma": "hf://kyutai/tts-voices/vctk/p303_023_enhanced.wav",
+    "george": "hf://kyutai/tts-voices/vctk/p315_023_enhanced.wav",
+    "mary": "hf://kyutai/tts-voices/vctk/p333_023_enhanced.wav",
+    "jane": "hf://kyutai/tts-voices/vctk/p339_023_enhanced.wav",
+    "michael": "hf://kyutai/tts-voices/vctk/p360_023_enhanced.wav",
+    "eve": "hf://kyutai/tts-voices/vctk/p361_023_enhanced.wav",
+    "bill_boerst": "hf://kyutai/tts-voices/voice-zero/bill_boerst.wav",
+    "peter_yearsley": "hf://kyutai/tts-voices/voice-zero/peter_yearsley.wav",
+    "stuart_bell": "hf://kyutai/tts-voices/voice-zero/stuart_bell.wav",
+    "caro_davy": "hf://kyutai/tts-voices/voice-zero/caro_davy.wav",
+}
+
 PREDEFINED_VOICES = {
     # don't forget to change this
-    x: f"hf://kyutai/pocket-tts-without-voice-cloning/embeddings_v2/{x}.safetensors@2578fed2380333b621689eaed6fe144cf69dfeb3"
-    for x in _voices_names
+    x: f"hf://kyutai/pocket-tts-without-voice-cloning/embeddings_v3/{x}.safetensors@075c0abfe7e41450521b0200b5168cfbc16bc77b"
+    for x in _ORIGINS_OF_PREDEFINED_VOICES
 }
 
 
diff --git a/scripts/generate_default_voices.py b/scripts/generate_default_voices.py
@@ -0,0 +1,23 @@
+import scipy.io.wavfile
+
+from pocket_tts import TTSModel, export_model_state
+from pocket_tts.utils.utils import _ORIGINS_OF_PREDEFINED_VOICES
+
+model = TTSModel.load_model()
+
+for voice_name, voice_origin in _ORIGINS_OF_PREDEFINED_VOICES.items():
+    print(f"Processing voice: {voice_name} from origin: {voice_origin}")
+    # Export a voice state for fast loading later
+    model_state = model.get_state_for_audio_prompt(voice_origin)
+    export_model_state(model_state, f"./built-in-voices/{voice_name}.safetensors")
+
+    model_state_copy = model.get_state_for_audio_prompt(
+        f"./built-in-voices/{voice_name}.safetensors"
+    )
+
+    audio = model.generate_audio(
+        model_state_copy, "Hello, it's a good day, isn't it? I hope you are doing well."
+    )
+    scipy.io.wavfile.write(
+        f"./built-in-voices-generated/{voice_name}.wav", model.sample_rate, audio.numpy()
+    )
diff --git a/scripts/generate_default_voices.sh b/scripts/generate_default_voices.sh