Skip to content

Commit df1876c

Browse files
feat: add Fish Audio TTS support (#2434)
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
1 parent b86a116 commit df1876c

File tree

5 files changed

+151
-2
lines changed

5 files changed

+151
-2
lines changed

GUI/settings.html

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,60 @@
253253
<div class="col-8">
254254
<select name="voice_choice" class="form-select" data-toggle="tooltip"
255255
data-original-title='The voice platform used for TTS generation'>
256-
<option value="streamlabspolly">Streamlabspolly</option>
257256
<option value="tiktok">TikTok</option>
257+
<option value="fishaudio">Fish Audio</option>
258+
<option value="streamlabspolly">Streamlabs Polly</option>
258259
<option value="googletranslate">Google Translate</option>
259260
<option value="awspolly">AWS Polly</option>
260261
<option value="pyttsx">Python TTS (pyttsx)</option>
261262
</select>
262263
</div>
263264
</div>
265+
<div class="row mb-2">
266+
<label for="fishaudio_api_key" class="col-4">Fish Audio API Key</label>
267+
<div class="col-8">
268+
<div class="input-group">
269+
<div class="input-group-text">
270+
<i class="bi bi-key-fill"></i>
271+
</div>
272+
<input value="{{ data.fishaudio_api_key }}" name="fishaudio_api_key" type="text" class="form-control"
273+
data-toggle="tooltip"
274+
data-original-title="Fish Audio API key for TTS generation. Get one at fish.audio">
275+
</div>
276+
</div>
277+
</div>
278+
<div class="row mb-2">
279+
<label for="fishaudio_voice_select" class="col-4">Fish Audio Voice</label>
280+
<div class="col-8">
281+
<select id="fishaudio_voice_select" class="form-select" data-toggle="tooltip"
282+
data-original-title='Select a preset voice or choose Custom to enter your own'
283+
onchange="handleFishAudioVoiceChange(this)">
284+
<option value="bf322df2096a46f18c579d0baa36f41d">Adrian (Default)</option>
285+
<option value="8ef4a238714b45718ce04243307c57a7">E-girl</option>
286+
<option value="802e3bc2b27e49c2995d23ef70e6ac89">Energetic Male</option>
287+
<option value="933563129e564b19a115bedd57b7406a">Sarah</option>
288+
<option value="b347db033a6549378b48d00acb0d06cd">Selene</option>
289+
<option value="536d3a5e000945adb7038665781a4aca">Ethan</option>
290+
<option value="custom">Custom Voice ID...</option>
291+
</select>
292+
<input type="hidden" name="fishaudio_voice" id="fishaudio_voice" value="{{ data.fishaudio_voice }}">
293+
</div>
294+
</div>
295+
<div class="row mb-2" id="fishaudio_custom_row" style="display: none;">
296+
<label for="fishaudio_custom_voice" class="col-4">Custom Voice ID</label>
297+
<div class="col-8">
298+
<div class="input-group">
299+
<div class="input-group-text">
300+
<i class="bi bi-soundwave"></i>
301+
</div>
302+
<input id="fishaudio_custom_voice" type="text" class="form-control"
303+
placeholder="Enter voice ID from fish.audio"
304+
data-toggle="tooltip"
305+
data-original-title="Voice ID from fish.audio - find it in the URL of any voice">
306+
</div>
307+
<span class="form-text text-muted"><a href="https://fish.audio/discovery" target="_blank">Browse voices at fish.audio/discovery</a></span>
308+
</div>
309+
</div>
264310
<div class="row mb-2">
265311
<label for="aws_polly_voice" class="col-4">AWS Polly Voice</label>
266312
<div class="col-8">
@@ -432,6 +478,22 @@
432478
</main>
433479

434480
<script>
481+
// Fish Audio voice change handler (global function for inline onchange)
482+
function handleFishAudioVoiceChange(selectElement) {
483+
const selected = selectElement.value;
484+
const customRow = document.getElementById('fishaudio_custom_row');
485+
const hiddenInput = document.getElementById('fishaudio_voice');
486+
const customInput = document.getElementById('fishaudio_custom_voice');
487+
488+
if (selected === 'custom') {
489+
customRow.style.display = '';
490+
customInput.focus();
491+
} else {
492+
customRow.style.display = 'none';
493+
hiddenInput.value = selected;
494+
}
495+
}
496+
435497
// Test voices buttons
436498
var playing = false;
437499

@@ -544,6 +606,44 @@
544606
validate($(this));
545607
});
546608

609+
// Fish Audio voice selector logic
610+
const presetVoices = [
611+
"bf322df2096a46f18c579d0baa36f41d",
612+
"8ef4a238714b45718ce04243307c57a7",
613+
"802e3bc2b27e49c2995d23ef70e6ac89",
614+
"933563129e564b19a115bedd57b7406a",
615+
"b347db033a6549378b48d00acb0d06cd",
616+
"536d3a5e000945adb7038665781a4aca"
617+
];
618+
619+
// Initialize Fish Audio voice on page load
620+
const savedVoice = data.fishaudio_voice || "bf322df2096a46f18c579d0baa36f41d";
621+
if (presetVoices.includes(savedVoice)) {
622+
$("#fishaudio_voice_select").val(savedVoice);
623+
} else {
624+
$("#fishaudio_voice_select").val("custom");
625+
$("#fishaudio_custom_row").show();
626+
$("#fishaudio_custom_voice").val(savedVoice);
627+
}
628+
$("#fishaudio_voice").val(savedVoice);
629+
630+
// Handle dropdown change
631+
$("#fishaudio_voice_select").on("change", function() {
632+
const selected = $(this).val();
633+
if (selected === "custom") {
634+
$("#fishaudio_custom_row").show();
635+
$("#fishaudio_custom_voice").focus();
636+
} else {
637+
$("#fishaudio_custom_row").hide();
638+
$("#fishaudio_voice").val(selected);
639+
}
640+
});
641+
642+
// Handle custom input change
643+
$("#fishaudio_custom_voice").on("input", function() {
644+
$("#fishaudio_voice").val($(this).val());
645+
});
646+
547647
function validate(object) {
548648
let bool = check(object.prop("name"), object.prop("value"));
549649

TTS/fishaudio.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import random
2+
3+
from fishaudio import FishAudio as FishAudioClient
4+
from fishaudio.utils import save
5+
6+
from utils import settings
7+
8+
9+
class FishAudio:
10+
def __init__(self):
11+
self.max_chars = 5000
12+
self.client = None
13+
self.voices = [
14+
"8ef4a238714b45718ce04243307c57a7", # E-girl
15+
"802e3bc2b27e49c2995d23ef70e6ac89", # Energetic Male
16+
"933563129e564b19a115bedd57b7406a", # Sarah
17+
"bf322df2096a46f18c579d0baa36f41d", # Adrian
18+
"b347db033a6549378b48d00acb0d06cd", # Selene
19+
"536d3a5e000945adb7038665781a4aca", # Ethan
20+
]
21+
22+
def run(self, text, filepath, random_voice: bool = False):
23+
if self.client is None:
24+
self.initialize()
25+
26+
if random_voice:
27+
voice_id = self.randomvoice()
28+
else:
29+
voice_id = str(settings.config["settings"]["tts"]["fishaudio_voice"])
30+
31+
audio = self.client.tts.convert(text=text, reference_id=voice_id)
32+
save(audio, filepath)
33+
34+
def initialize(self):
35+
api_key = settings.config["settings"]["tts"].get("fishaudio_api_key")
36+
if not api_key:
37+
raise ValueError(
38+
"You didn't set a Fish Audio API key! Please set the config variable fishaudio_api_key to a valid API key."
39+
)
40+
41+
self.client = FishAudioClient(api_key=api_key)
42+
43+
def randomvoice(self):
44+
return random.choice(self.voices)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
boto3==1.36.8
22
botocore==1.36.8
3+
fish-audio-sdk==1.1.0
34
gTTS==2.5.4
45
moviepy==2.2.1
56
playwright==1.49.1

utils/.config.template.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96,
4444
background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" }
4545

4646
[settings.tts]
47-
voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
47+
voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "fishaudio", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
4848
random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" }
4949
elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] }
5050
elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
51+
fishaudio_api_key = { optional = true, example = "your_fish_audio_api_key", explanation = "Fish Audio API key for TTS generation" }
52+
fishaudio_voice = { optional = false, default = "bf322df2096a46f18c579d0baa36f41d", example = "bf322df2096a46f18c579d0baa36f41d", explanation = "The voice ID used for Fish Audio TTS. Find voice IDs at fish.audio/discovery" }
5153
aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
5254
streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
5355
tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" }

video_creation/voices.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from TTS.aws_polly import AWSPolly
66
from TTS.elevenlabs import elevenlabs
77
from TTS.engine_wrapper import TTSEngine
8+
from TTS.fishaudio import FishAudio
89
from TTS.GTTS import GTTS
910
from TTS.openai_tts import OpenAITTS
1011
from TTS.pyttsx import pyttsx
@@ -23,6 +24,7 @@
2324
"pyttsx": pyttsx,
2425
"ElevenLabs": elevenlabs,
2526
"OpenAI": OpenAITTS,
27+
"FishAudio": FishAudio,
2628
}
2729

2830

0 commit comments

Comments
 (0)