Skip to content

Commit 3e1560b

Browse files
committed
test tts runner, fix bugs, optim code
1 parent 1c1fba4 commit 3e1560b

File tree

17 files changed

+723
-171
lines changed

17 files changed

+723
-171
lines changed

configs/tts_infer.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
custom:
2+
bert_base_path: models/pretrained/chinese-roberta-wwm-ext-large
3+
cnhuhbert_base_path: models/pretrained/chinese-hubert-base
4+
device: cpu
5+
is_half: false
6+
t2s_weights_path: models/pretrained/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
7+
vits_weights_path: models/pretrained/gsv-v2final-pretrained/s2G2333k.pth
8+
default:
9+
bert_base_path: models/pretrained/chinese-roberta-wwm-ext-large
10+
cnhuhbert_base_path: models/pretrained/chinese-hubert-base
11+
device: cpu
12+
is_half: false
13+
t2s_weights_path: models/pretrained/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
14+
vits_weights_path: models/pretrained/gsv-v2final-pretrained/s2G2333k.pth

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,5 @@ dependencies = [
3838
"pyjyutping",
3939
"cn2an",
4040
"python-mecab-ko",
41+
"matplotlib",
4142
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,4 @@ funasr==1.0.27
4141
torchaudio
4242
python-mecab-ko
4343
opencc
44+
matplotlib

src/easevoice/configs/.gitignore

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/easevoice/configs/s2.json

Lines changed: 0 additions & 90 deletions
This file was deleted.

src/easevoice/inference/__init__.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66
import logging
77

8-
from ...utils.config.config import GlobalCFG
8+
from ...utils.path import get_base_path
99
from ...logger import logger
1010

1111

@@ -22,7 +22,7 @@
2222

2323
@dataclasses.dataclass
2424
class InferenceResult:
25-
items: list = []
25+
items: list = dataclasses.field(default_factory=list)
2626
seed: int = -1
2727
error: Optional[str] = None
2828

@@ -35,7 +35,7 @@ class InferenceTaskData:
3535
prompt_text: str
3636
prompt_lang: str
3737
text_split_method: str
38-
aux_ref_audio_paths: list = []
38+
aux_ref_audio_paths: list = dataclasses.field(default_factory=list)
3939
seed = -1
4040
top_k = 5
4141
top_p = 1
@@ -65,20 +65,7 @@ class Runner:
6565
"""
6666

6767
def __init__(self, queue: multiprocessing.Queue):
68-
cfg = GlobalCFG()
69-
70-
gpt_path = os.environ.get("gpt_path", None)
71-
sovits_path = os.environ.get("sovits_path", None)
72-
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
73-
bert_path = os.environ.get("bert_path", None)
74-
75-
tts_config = TTSConfig("GPT_SoVITS/configs/tts_infer.yaml")
76-
tts_config.device = cfg.device
77-
tts_config.is_half = cfg.is_half
78-
tts_config.t2s_weights_path = gpt_path if gpt_path is not None else tts_config.t2s_weights_path
79-
tts_config.vits_weights_path = sovits_path if sovits_path is not None else tts_config.vits_weights_path
80-
tts_config.cnhuhbert_base_path = cnhubert_base_path if cnhubert_base_path is not None else tts_config.cnhuhbert_base_path
81-
tts_config.bert_base_path = bert_path if bert_path is not None else tts_config.bert_base_path
68+
tts_config = TTSConfig(os.path.join(get_base_path(), "configs", "tts_infer.yaml"))
8269
logger.info(f"tts config: {tts_config}")
8370

8471
self.tts_config = tts_config

src/easevoice/inference/tts.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11

22
import dataclasses
3+
4+
from ...utils.config.config import GlobalCFG
5+
from ...utils.path import get_base_path
36
from .preprocessor import TextPreprocessor
47
from .segmentation import SPLITS
5-
from module.mel_processing import spectrogram_torch
8+
from ..module.mel_processing import spectrogram_torch
69
from ...utils.audio import load_audio
710
from time import time as ttime
811
import librosa
9-
from module.models import SynthesizerTrn
12+
from ..module.models import SynthesizerTrn
1013
from ..feature_extractor.cnhubert import CNHubert
1114
from ..soundstorm.auto_reg.models.t2s_lightning_module import Text2SemanticLightningModule
1215
from transformers import AutoModelForMaskedLM, AutoTokenizer
@@ -54,16 +57,21 @@ def set_seed(seed: int):
5457
return seed
5558

5659

60+
def _get_default_configs():
61+
global_config = GlobalCFG()
62+
return {
63+
"device": global_config.device,
64+
"is_half": global_config.is_half,
65+
"t2s_weights_path": global_config.gpt_path,
66+
"vits_weights_path": global_config.sovits_path,
67+
"cnhuhbert_base_path": global_config.cnhubert_path,
68+
"bert_base_path": global_config.bert_path,
69+
}
70+
71+
5772
class TTSConfig:
5873
default_configs = {
59-
"default": {
60-
"device": "cpu",
61-
"is_half": False,
62-
"t2s_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
63-
"vits_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
64-
"cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
65-
"bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
66-
},
74+
"default": _get_default_configs(),
6775
}
6876
languages: list = ["auto", "auto_yue", "en", "zh", "ja", "yue", "ko", "all_zh", "all_ja", "all_yue", "all_ko"]
6977
# "all_zh",#全部按中文识别
@@ -79,6 +87,8 @@ class TTSConfig:
7987
# "auto_yue",#多语种启动切分识别语种
8088

8189
def __init__(self, configs: Union[dict, str, None] = None): # pyright: ignore
90+
global_config = GlobalCFG()
91+
8292
configs_base_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "configs")
8393
os.makedirs(configs_base_path, exist_ok=True)
8494
self.configs_path: str = os.path.join(configs_base_path, "tts_infer.yaml")
@@ -97,8 +107,8 @@ def __init__(self, configs: Union[dict, str, None] = None): # pyright: ignore
97107
self.default_configs["default"] = configs.get("default", self.default_configs["default"])
98108

99109
self.configs: dict = configs.get("custom", deepcopy(self.default_configs["default"]))
100-
self.device = self.configs.get("device", torch.device("cpu"))
101-
self.is_half = self.configs.get("is_half", False)
110+
self.device = self.configs.get("device", global_config.device)
111+
self.is_half = self.configs.get("is_half", global_config.is_half)
102112

103113
def get_path(key: str):
104114
path = self.configs.get(key, None)
@@ -180,7 +190,7 @@ def __init__(self, configs: Union[dict, str, TTSConfig]):
180190

181191
self.t2s_model: Text2SemanticLightningModule = None # pyright: ignore
182192
self.vits_model: SynthesizerTrn = None # pyright: ignore
183-
self.bert_tokenizer: = None # pyright: ignore
193+
self.bert_tokenizer: AutoTokenizer = None # pyright: ignore
184194
self.bert_model: AutoModelForMaskedLM = None # pyright: ignore
185195
self.cnhuhbert_model: CNHubert = None # pyright: ignore
186196

@@ -223,7 +233,7 @@ def init_cnhuhbert_weights(self, base_path: str):
223233

224234
def init_bert_weights(self, base_path: str):
225235
logger.info(f"Loading BERT weights from {base_path}")
226-
self.bert_tokenizer = AutoTokenizer.from_pretrained(base_path)
236+
self.bert_tokenizer = AutoTokenizer.from_pretrained(base_path) # pyright: ignore
227237
self.bert_model = AutoModelForMaskedLM.from_pretrained(base_path)
228238
self.bert_model = self.bert_model.eval() # pyright: ignore
229239
self.bert_model = self.bert_model.to(self.configs.device) # pyright: ignore

src/easevoice/module/data_utils.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,14 @@
1-
import time
2-
import logging
31
import os
42
import random
53
import traceback
6-
import numpy as np
74
import torch
85
import torch.utils.data
96
from tqdm import tqdm
107

11-
from module import commons
12-
from module.mel_processing import spectrogram_torch
8+
from . import commons
9+
from .mel_processing import spectrogram_torch
1310
from text import cleaned_text_to_sequence
1411
import torch.nn.functional as F
15-
from functools import lru_cache
16-
import requests
17-
from scipy.io import wavfile
18-
from io import BytesIO
1912
from ...utils.audio import load_audio
2013
version = os.environ.get('version', None)
2114
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)

src/easevoice/module/mel_processing.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,5 @@
1-
import math
2-
import os
3-
import random
41
import torch
5-
from torch import nn
6-
import torch.nn.functional as F
72
import torch.utils.data
8-
import numpy as np
9-
import librosa
10-
import librosa.util as librosa_util
11-
from librosa.util import normalize, pad_center, tiny
12-
from scipy.signal import get_window
13-
from scipy.io.wavfile import read
143
from librosa.filters import mel as librosa_mel_fn
154

165
MAX_WAV_VALUE = 32768.0

src/easevoice/module/models.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
11
import contextlib
22
from torch.cuda.amp import autocast
33
from ..text.symbols import SYMBOLS
4-
from module.quantize import ResidualVectorQuantizer
5-
from module.mrte_model import MRTE
6-
from module.commons import init_weights, get_padding
4+
from .quantize import ResidualVectorQuantizer
5+
from .mrte_model import MRTE
6+
from .commons import init_weights, get_padding
77
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
88
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
9-
from module import attentions
10-
from module import modules
11-
from module import commons
9+
from . import attentions, modules, commons
1210
from torch.nn import functional as F
1311
from torch import nn
1412
import torch
15-
import pdb
16-
import os
1713
import math
18-
import copy
1914
import warnings
2015
warnings.filterwarnings("ignore")
2116

0 commit comments

Comments
 (0)