Skip to content

Commit 5416b28

Browse files
Update TTS Server
- Fix requirements.txt - Fix run_server.bat - Enable debug for main.py - Improve main.py code readability
1 parent 649e0c7 commit 5416b28

File tree

3 files changed

+43
-96
lines changed

3 files changed

+43
-96
lines changed

batch/run_server.bat

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
@echo off
22
cd /d %~dp0\Server
33

4-
echo *************************************************
5-
echo UnityNeuroSpeech Text-To-Speech server is running!
6-
echo UnityNeuroSpeech official GitHub repository: https://github.com/HardCodeDev777/UnityNeuroSpeech
7-
echo *************************************************
8-
9-
call .venv\Scripts\activate
10-
python main.py
4+
.venv\Scripts\python.exe main.py
115

126
pause

server/main.py

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
import torch, io, sys, os, logging, warnings
22
from flask import Flask, request, Response, render_template
33
from langdetect import detect
4-
from torch.serialization import add_safe_globals
54
from TTS.api import TTS
6-
from TTS.tts.configs.xtts_config import XttsConfig
7-
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
8-
from TTS.config.shared_configs import BaseDatasetConfig
9-
add_safe_globals([XttsConfig, XttsAudioConfig, XttsArgs, BaseDatasetConfig])
105

11-
warnings.simplefilter(action='ignore', category=FutureWarning)
12-
sys.stdout = open(os.devnull, 'w')
13-
logging.disable(logging.CRITICAL)
6+
# warnings.simplefilter(action='ignore', category=FutureWarning)
7+
# sys.stdout = open(os.devnull, 'w')
8+
# logging.disable(logging.CRITICAL)
9+
10+
print(f"Python executable(for gebug): {sys.executable}")
1411

1512
device = "cuda" if torch.cuda.is_available() else "cpu"
1613
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -19,25 +16,29 @@
1916
tts = TTS(model_path=MODEL_PATH, config_path=CONFIG_PATH, progress_bar=False)
2017
tts.to(device)
2118

22-
EN_VOICE = "./Voices/en_voice.wav"
23-
ES_VOICE = "./Voices/es_voice.wav"
24-
FR_VOICE = "./Voices/fr_voice.wav"
25-
DE_VOICE = "./Voices/de_voice.wav"
26-
IT_VOICE = "./Voices/it_voice.wav"
27-
PT_VOICE = "./Voices/pt_voice.wav"
28-
PL_VOICE = "./Voices/pl_voice.wav"
29-
TR_VOICE = "./Voices/tr_voice.wav"
30-
RU_VOICE = "./Voices/ru_voice.wav"
31-
NL_VOICE = "./Voices/nl_voice.wav"
32-
CS_VOICE = "./Voices/cs_voice.wav"
33-
AR_VOICE = "./Voices/ar_voice.wav"
34-
ZH_CN_VOICE = "./Voices/zh_cn_voice.wav"
35-
HU_VOICE = "./Voices/hu_voice.wav"
36-
KO_VOICE = "./Voices/ko_voice.wav"
37-
JA_VOICE = "./Voices/ja_voice.wav"
38-
HI_VOICE = "./Voices/hi_voice.wav"
19+
VOICES = {
20+
'en': "./Voices/en_voice.wav",
21+
'es': "./Voices/es_voice.wav",
22+
'fr': "./Voices/fr_voice.wav",
23+
'de': "./Voices/de_voice.wav",
24+
'it': "./Voices/it_voice.wav",
25+
'pt': "./Voices/pt_voice.wav",
26+
'pl': "./Voices/pl_voice.wav",
27+
'tr': "./Voices/tr_voice.wav",
28+
'ru': "./Voices/ru_voice.wav",
29+
'nl': "./Voices/nl_voice.wav",
30+
'cs': "./Voices/cs_voice.wav",
31+
'ar': "./Voices/ar_voice.wav",
32+
'zh-cn': "./Voices/zh_cn_voice.wav",
33+
'hu': "./Voices/hu_voice.wav",
34+
'ko': "./Voices/ko_voice.wav",
35+
'ja': "./Voices/ja_voice.wav",
36+
'hi': "./Voices/hi_voice.wav"
37+
}
38+
3939
app = Flask(__name__)
4040

41+
4142
@app.route('/')
4243
def index():
4344
return render_template("index.html")
@@ -49,43 +50,9 @@ def speak():
4950
lang = detect(text)
5051
except:
5152
lang = "en"
52-
if lang not in ['en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'hu', 'ko', 'ja', 'hi']:
53-
lang = "en"
5453

55-
if(lang == "en"):
56-
speaker_file = EN_VOICE
57-
elif(lang == "es"):
58-
speaker_file = ES_VOICE
59-
elif(lang == "fr"):
60-
speaker_file = FR_VOICE
61-
elif(lang == "de"):
62-
speaker_file = DE_VOICE
63-
elif(lang == "it"):
64-
speaker_file = IT_VOICE
65-
elif(lang == "pt"):
66-
speaker_file = PT_VOICE
67-
elif(lang == "pl"):
68-
speaker_file = PL_VOICE
69-
elif(lang == "tr"):
70-
speaker_file = TR_VOICE
71-
elif(lang == "ru"):
72-
speaker_file = RU_VOICE
73-
elif(lang == "nl"):
74-
speaker_file = NL_VOICE
75-
elif(lang == "cs"):
76-
speaker_file = CS_VOICE
77-
elif(lang == "ar"):
78-
speaker_file = AR_VOICE
79-
elif(lang == "zh-cn"):
80-
speaker_file = ZH_CN_VOICE
81-
elif(lang == "hu"):
82-
speaker_file = HU_VOICE
83-
elif(lang == "ko"):
84-
speaker_file = KO_VOICE
85-
elif(lang == "ja"):
86-
speaker_file = JA_VOICE
87-
elif(lang == "hi"):
88-
speaker_file = HI_VOICE
54+
# Default to English if language not supported
55+
speaker_file = VOICES.get(lang, VOICES['en'])
8956

9057
buf = io.BytesIO()
9158
with torch.inference_mode():
@@ -101,5 +68,6 @@ def speak():
10168

10269
return Response(data, mimetype="audio/wav")
10370

104-
if __name__=="__main__":
71+
72+
if __name__ == "__main__":
10573
app.run(port=7777, threaded=True)

server/requirements.txt

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,45 @@
1-
# core deps
2-
numpy==1.22.0;python_version<="3.10"
3-
numpy>=1.24.3;python_version>"3.10"
1+
# Core dependencies
2+
torch==2.1.0
3+
torchaudio==2.1.0
4+
transformers==4.33.0
5+
TTS==0.21.2
6+
7+
# Additional critical dependencies
8+
numpy>=1.24.3
49
cython>=0.29.30
510
scipy>=1.11.2
6-
torch>=2.1
7-
torchaudio
811
soundfile>=0.12.0
912
librosa>=0.10.0
1013
scikit-learn>=1.3.0
11-
numba==0.55.1;python_version<"3.9"
12-
numba>=0.57.0;python_version>="3.9"
14+
numba>=0.57.0
1315
inflect>=5.6.0
1416
tqdm>=4.64.1
1517
anyascii>=0.3.0
1618
pyyaml>=6.0
17-
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
19+
fsspec>=2023.6.0
1820
aiohttp>=3.8.1
1921
packaging>=23.1
2022
mutagen==1.47.0
21-
# deps for examples
2223
flask>=2.0.1
23-
# deps for inference
2424
pysbd>=0.3.4
25-
# deps for notebooks
2625
umap-learn>=0.5.1
2726
pandas>=1.4,<2.0
28-
# deps for training
2927
matplotlib>=3.7.0
30-
# coqui stack
3128
trainer>=0.0.36
32-
# config management
3329
coqpit>=0.0.16
34-
# chinese g2p deps
3530
jieba
3631
pypinyin
37-
# korean
3832
hangul_romanize
39-
# gruut+supported langs
4033
gruut[de,es,fr]==2.2.3
41-
# deps for korean
4234
jamo
4335
nltk
4436
g2pkk>=0.1.1
45-
# deps for bangla
4637
bangla
4738
bnnumerizer
4839
bnunicodenormalizer
49-
#deps for tortoise
5040
einops>=0.6.0
51-
52-
# UnityNeuroSpeech's developer note - "I HATE IT"
53-
transformers==4.33.0
54-
55-
#deps for bark
5641
encodec>=0.1.1
57-
# deps for XTTS
5842
unidecode>=1.3.2
5943
num2words
60-
spacy[ja]>=3
44+
spacy[ja]>=3
45+
langdetect>=1.0.9

0 commit comments

Comments
 (0)