Skip to content

Commit 57a6c0b

Browse files
authored
Merge pull request #4 from overcrash66/develop
add TTS tool
2 parents 406a3e2 + 60c5a9b commit 57a6c0b

File tree

4 files changed

+120
-0
lines changed

4 files changed

+120
-0
lines changed

Screenshot.png

-73.3 KB
Loading

Screenshot3.png

84 KB
Loading

readme.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
- PyTranscriber (shortcut)
2323
- Exit
2424

25+
## Demo:
26+
27+
[![Open Translator Intro](https://img.youtube.com/vi/GIhtXs8T8FA/0.jpg)](https://www.youtube.com/watch?v=GIhtXs8T8FA)
28+
29+
[![Open Translator GUI demo](https://img.youtube.com/vi/_5SoStnGqIw/0.jpg)](https://www.youtube.com/watch?v=_5SoStnGqIw)
30+
2531
## Requirements
2632

2733
Make sure you have the following dependencies installed:
@@ -101,6 +107,8 @@ python WebUI.py
101107

102108
![Web](Screenshot.png)
103109

110+
![TTS](Screenshot3.png)
111+
104112
## Audio translation examples:
105113
- [ar-test.mp3](./testResults/ar-test.mp3)
106114
- [cn-test.mp3](./testResults/cn-test.mp3)

textToSpeech.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import gradio as gr
2+
from TTS.api import TTS
3+
from datetime import datetime
4+
import time
5+
6+
class TTSVoiceCloningTool:
7+
def __init__(self):
8+
self.tts = None
9+
10+
def load_tts_model(self):
11+
"""Load the TTS model."""
12+
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
13+
14+
def unload_tts_model(self):
15+
"""Unload the TTS model to free memory."""
16+
del self.tts
17+
self.tts = None
18+
19+
def get_supported_languages(self):
20+
"""Return a list of supported languages."""
21+
self.load_tts_model()
22+
languages = self.tts.list_languages()
23+
self.unload_tts_model()
24+
return languages
25+
26+
def generate_audio(self, text, output_path, target_language, input_path, speed):
27+
"""Generate audio using TTS."""
28+
print("Generating audio...")
29+
start_time = time.time()
30+
31+
self.load_tts_model()
32+
33+
# Generate audio with adjustable speed
34+
self.tts.tts_to_file(
35+
text=text,
36+
speaker_wav=input_path,
37+
language=target_language,
38+
file_path=output_path,
39+
speed=speed # Pass the speed parameter
40+
)
41+
42+
end_time = time.time()
43+
execution_time = (end_time - start_time) / 60
44+
print(f"Audio generated in {execution_time:.2f} minutes")
45+
46+
self.unload_tts_model()
47+
return output_path
48+
49+
def tts_interface(text, reference_audio, language, speed):
50+
"""Wrapper for the TTS tool to integrate with Gradio."""
51+
if not reference_audio:
52+
return "Error: Please provide a reference audio file for voice cloning."
53+
54+
# Generate a timestamped filename
55+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
56+
output_path = f"Tts_{timestamp}.mp3"
57+
58+
tts_tool = TTSVoiceCloningTool()
59+
try:
60+
result_path = tts_tool.generate_audio(text, output_path, language, reference_audio, speed)
61+
return result_path
62+
except Exception as e:
63+
return f"Error: {str(e)}"
64+
65+
# Gradio GUI
66+
def main():
67+
# Manually specify supported languages
68+
supported_languages = ["en", "es", "fr", "de", "it", "pt", "nl", "ru", "zh", "ja"] # Add or remove languages as necessary
69+
70+
with gr.Blocks() as demo:
71+
gr.Markdown("""
72+
# 🎙️ TTS Voice Cloning Tool
73+
Convert your text into speech using voice cloning! Provide a reference audio to mimic the voice, select the target language, and adjust the output speed.
74+
""")
75+
76+
with gr.Row():
77+
text_input = gr.Textbox(
78+
label="Enter Text to Convert to Speech",
79+
placeholder="Type or paste the text you want to convert to speech here...",
80+
lines=8,
81+
max_lines=20
82+
)
83+
84+
with gr.Row():
85+
reference_audio_input = gr.Audio(label="Reference Audio File", type="filepath")
86+
language_input = gr.Dropdown(
87+
label="Target Language",
88+
choices=supported_languages,
89+
value="en" # Default to English
90+
)
91+
speed_input = gr.Slider(
92+
label="Output Audio Speed",
93+
minimum=0.5,
94+
maximum=2.0,
95+
value=1.0, # Default speed
96+
step=0.1
97+
)
98+
99+
output_audio = gr.Audio(label="Generated Audio", type="filepath")
100+
101+
submit_button = gr.Button(value="🎧 Generate Audio", variant="primary")
102+
103+
submit_button.click(
104+
tts_interface,
105+
inputs=[text_input, reference_audio_input, language_input, speed_input],
106+
outputs=[output_audio]
107+
)
108+
109+
demo.launch(server_name="127.0.0.2", server_port=7862)
110+
111+
if __name__ == "__main__":
112+
main()

0 commit comments

Comments
 (0)