Skip to content

Commit b4c6531

Browse files
Format code (#142)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 376bd31 commit b4c6531

File tree

8 files changed

+64
-51
lines changed

8 files changed

+64
-51
lines changed

export_onnx.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,29 @@
22
from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
33
import torch
44

5-
if __name__ == '__main__':
6-
MoeVS = True #模型是否为MoeVoiceStudio(原MoeSS)使用
5+
if __name__ == "__main__":
6+
MoeVS = True # 模型是否为MoeVoiceStudio(原MoeSS)使用
77

8-
ModelPath = "Shiroha/shiroha.pth" #模型路径
9-
ExportedPath = "model.onnx" #输出路径
10-
hidden_channels = 256 # hidden_channels,为768Vec做准备
11-
cpt = torch.load(ModelPath, map_location="cpu")
12-
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
8+
ModelPath = "Shiroha/shiroha.pth" # 模型路径
9+
ExportedPath = "model.onnx" # 输出路径
10+
hidden_channels = 256 # hidden_channels,为768Vec做准备
11+
cpt = torch.load(ModelPath, map_location="cpu")
12+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
1313
print(*cpt["config"])
1414

15-
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
16-
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
17-
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
18-
test_pitchf = torch.rand(1, 200) # nsf基频
19-
test_ds = torch.LongTensor([0]) # 说话人ID
20-
test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
15+
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
16+
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
17+
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
18+
test_pitchf = torch.rand(1, 200) # nsf基频
19+
test_ds = torch.LongTensor([0]) # 说话人ID
20+
test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
2121

22-
device = "cpu" #导出时设备(不影响使用模型)
22+
device = "cpu" # 导出时设备(不影响使用模型)
2323

2424
if MoeVS:
25-
net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
25+
net_g = SynthesizerTrnMs256NSFsidM(
26+
*cpt["config"], is_half=False
27+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
2628
net_g.load_state_dict(cpt["weight"], strict=False)
2729
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
2830
output_names = [
@@ -52,7 +54,9 @@
5254
output_names=output_names,
5355
)
5456
else:
55-
net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
57+
net_g = SynthesizerTrnMs256NSFsidO(
58+
*cpt["config"], is_half=False
59+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
5660
net_g.load_state_dict(cpt["weight"], strict=False)
5761
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
5862
output_names = [
@@ -78,4 +82,4 @@
7882
verbose=False,
7983
input_names=input_names,
8084
output_names=output_names,
81-
)
85+
)

extract_f0_print.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, samplerate=16000, hop_size=160):
3535
def compute_f0(self, path, f0_method):
3636
# default resample type of librosa.resample is "soxr_hq".
3737
# Quality: soxr_vhq > soxr_hq
38-
x, sr = librosa.load(path, self.fs)#, res_type='soxr_vhq'
38+
x, sr = librosa.load(path, self.fs) # , res_type='soxr_vhq'
3939
p_len = x.shape[0] // self.hop
4040
assert sr == self.fs
4141
if f0_method == "pm":

gui.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def __init__(
6767
print(e)
6868

6969
def get_f0(self, x, f0_up_key, inp_f0=None):
70-
x_pad=1
70+
x_pad = 1
7171
f0_min = 50
7272
f0_max = 1100
7373
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
@@ -137,7 +137,7 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
137137
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
138138
torch.cuda.synchronize()
139139
print(feats.shape)
140-
if(self.if_f0==1):
140+
if self.if_f0 == 1:
141141
pitch, pitchf = self.get_f0(audio, self.f0_up_key)
142142
p_len = min(feats.shape[1], 13000, pitch.shape[0]) # 太大了爆显存
143143
else:
@@ -146,7 +146,7 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
146146
torch.cuda.synchronize()
147147
# print(feats.shape,pitch.shape)
148148
feats = feats[:, :p_len, :]
149-
if(self.if_f0==1):
149+
if self.if_f0 == 1:
150150
pitch = pitch[:p_len]
151151
pitchf = pitchf[:p_len]
152152
pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
@@ -155,17 +155,15 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
155155
ii = 0 # sid
156156
sid = torch.LongTensor([ii]).to(device)
157157
with torch.no_grad():
158-
if(self.if_f0==1):
158+
if self.if_f0 == 1:
159159
infered_audio = (
160160
self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
161161
.data.cpu()
162162
.float()
163163
)
164164
else:
165-
infered_audio = (
166-
self.net_g.infer(feats, p_len, sid)[0][0, 0]
167-
.data.cpu()
168-
.float()
165+
infered_audio = (
166+
self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
169167
)
170168
torch.cuda.synchronize()
171169
return infered_audio
@@ -387,7 +385,7 @@ def start_vc(self):
387385
self.config.pth_path,
388386
self.config.index_path,
389387
self.config.npy_path,
390-
self.config.index_rate
388+
self.config.index_rate,
391389
)
392390
self.input_wav: np.ndarray = np.zeros(
393391
self.extra_frame
@@ -511,7 +509,6 @@ def audio_callback(
511509
total_time = time.perf_counter() - start_time
512510
self.window["infer_time"].update(int(total_time * 1000))
513511
print("infer time:" + str(total_time))
514-
515512

516513
def get_devices(self, update: bool = True):
517514
"""获取设备列表"""

i18n.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ def load_language_list(language):
1111

1212
class I18nAuto:
1313
def __init__(self, language=None):
14-
if language in ['auto', None]:
15-
language = locale.getdefaultlocale()[0]#getlocale can't identify the system's language ((None, None))
14+
if language in ["auto", None]:
15+
language = locale.getdefaultlocale()[
16+
0
17+
] # getlocale can't identify the system's language ((None, None))
1618
if not os.path.exists(f"./i18n/{language}.json"):
1719
language = "en_US"
1820
self.language = language

infer-web.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ def load_hubert():
119119
uvr5_names.append(name.replace(".pth", ""))
120120

121121

122-
123122
def vc_single(
124123
sid,
125124
input_audio,
@@ -888,23 +887,27 @@ def change_info_(ckpt_path):
888887

889888
from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
890889
from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
890+
891+
891892
def export_onnx(ModelPath, ExportedPath, MoeVS=True):
892-
hidden_channels = 256 # hidden_channels,为768Vec做准备
893-
cpt = torch.load(ModelPath, map_location="cpu")
894-
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
893+
hidden_channels = 256 # hidden_channels,为768Vec做准备
894+
cpt = torch.load(ModelPath, map_location="cpu")
895+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
895896
print(*cpt["config"])
896897

897-
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
898-
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
899-
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
900-
test_pitchf = torch.rand(1, 200) # nsf基频
901-
test_ds = torch.LongTensor([0]) # 说话人ID
902-
test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
898+
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
899+
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
900+
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
901+
test_pitchf = torch.rand(1, 200) # nsf基频
902+
test_ds = torch.LongTensor([0]) # 说话人ID
903+
test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
903904

904-
device = "cpu" #导出时设备(不影响使用模型)
905+
device = "cpu" # 导出时设备(不影响使用模型)
905906

906907
if MoeVS:
907-
net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
908+
net_g = SynthesizerTrnMs256NSFsidM(
909+
*cpt["config"], is_half=False
910+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
908911
net_g.load_state_dict(cpt["weight"], strict=False)
909912
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
910913
output_names = [
@@ -934,7 +937,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
934937
output_names=output_names,
935938
)
936939
else:
937-
net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
940+
net_g = SynthesizerTrnMs256NSFsidO(
941+
*cpt["config"], is_half=False
942+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
938943
net_g.load_state_dict(cpt["weight"], strict=False)
939944
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
940945
output_names = [
@@ -963,6 +968,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
963968
)
964969
return "Finished"
965970

971+
966972
with gr.Blocks() as app:
967973
gr.Markdown(
968974
value=i18n(
@@ -1443,7 +1449,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
14431449
with gr.Row():
14441450
ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
14451451
with gr.Row():
1446-
onnx_dir = gr.Textbox(label=i18n("Onnx输出路径"), value="", interactive=True)
1452+
onnx_dir = gr.Textbox(
1453+
label=i18n("Onnx输出路径"), value="", interactive=True
1454+
)
14471455
with gr.Row():
14481456
moevs = gr.Checkbox(label=i18n("MoeVS模型"), value=True)
14491457
infoOnnx = gr.Label(label="Null")

my_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ def load_audio(file, sr):
1818
except Exception as e:
1919
raise RuntimeError(f"Failed to load audio: {e}")
2020

21-
return np.frombuffer(out, np.float32).flatten()
21+
return np.frombuffer(out, np.float32).flatten()

train/data_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def get_audio(self, filename):
9999
)
100100
)
101101
audio_norm = audio
102-
# audio_norm = audio / self.max_wav_value
103-
# audio_norm = audio / np.abs(audio).max()
102+
# audio_norm = audio / self.max_wav_value
103+
# audio_norm = audio / np.abs(audio).max()
104104

105105
audio_norm = audio_norm.unsqueeze(0)
106106
spec_filename = filename.replace(".wav", ".spec.pt")
@@ -291,8 +291,8 @@ def get_audio(self, filename):
291291
)
292292
)
293293
audio_norm = audio
294-
# audio_norm = audio / self.max_wav_value
295-
# audio_norm = audio / np.abs(audio).max()
294+
# audio_norm = audio / self.max_wav_value
295+
# audio_norm = audio / np.abs(audio).max()
296296

297297
audio_norm = audio_norm.unsqueeze(0)
298298
spec_filename = filename.replace(".wav", ".spec.pt")

trainset_preprocess_pipeline_print.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ def norm_write(self, tmp_audio, idx0, idx1):
6161
self.sr,
6262
tmp_audio.astype(np.float32),
6363
)
64-
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)#, res_type="soxr_vhq"
64+
tmp_audio = librosa.resample(
65+
tmp_audio, orig_sr=self.sr, target_sr=16000
66+
) # , res_type="soxr_vhq"
6567
wavfile.write(
6668
"%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
6769
16000,
@@ -72,7 +74,7 @@ def pipeline(self, path, idx0):
7274
try:
7375
audio = load_audio(path, self.sr)
7476
# zero phased digital filter cause pre-ringing noise...
75-
# audio = signal.filtfilt(self.bh, self.ah, audio)
77+
# audio = signal.filtfilt(self.bh, self.ah, audio)
7678
audio = signal.lfilter(self.bh, self.ah, audio)
7779

7880
idx1 = 0

0 commit comments

Comments
 (0)