Format code (#142)

github-actions[bot] · web-flow · commit b4c653142da9 · 2023-04-24T20:35:56.000+08:00
Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/export_onnx.py b/export_onnx.py
@@ -2,27 +2,29 @@
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
 import torch
 
-if __name__ == '__main__':
-    MoeVS = True #模型是否为MoeVoiceStudio（原MoeSS）使用
+if __name__ == "__main__":
+    MoeVS = True  # 模型是否为MoeVoiceStudio（原MoeSS）使用
 
-    ModelPath = "Shiroha/shiroha.pth"  #模型路径
-    ExportedPath = "model.onnx"        #输出路径
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    ModelPath = "Shiroha/shiroha.pth"  # 模型路径
+    ExportedPath = "model.onnx"  # 输出路径
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
     print(*cpt["config"])
 
-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
 
-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）
 
     if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
         output_names = [
@@ -52,7 +54,9 @@
             output_names=output_names,
         )
     else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
         output_names = [
@@ -78,4 +82,4 @@
             verbose=False,
             input_names=input_names,
             output_names=output_names,
-        )
+        )
diff --git a/extract_f0_print.py b/extract_f0_print.py
@@ -35,7 +35,7 @@ def __init__(self, samplerate=16000, hop_size=160):
     def compute_f0(self, path, f0_method):
         # default resample type of librosa.resample is "soxr_hq".
         # Quality: soxr_vhq > soxr_hq
-        x, sr = librosa.load(path, self.fs)#, res_type='soxr_vhq'
+        x, sr = librosa.load(path, self.fs)  # , res_type='soxr_vhq'
         p_len = x.shape[0] // self.hop
         assert sr == self.fs
         if f0_method == "pm":
diff --git a/gui.py b/gui.py
@@ -67,7 +67,7 @@ def __init__(
             print(e)
 
     def get_f0(self, x, f0_up_key, inp_f0=None):
-        x_pad=1
+        x_pad = 1
         f0_min = 50
         f0_max = 1100
         f0_mel_min = 1127 * np.log(1 + f0_min / 700)
@@ -137,7 +137,7 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
         feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
         torch.cuda.synchronize()
         print(feats.shape)
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
             pitch, pitchf = self.get_f0(audio, self.f0_up_key)
             p_len = min(feats.shape[1], 13000, pitch.shape[0])  # 太大了爆显存
         else:
@@ -146,7 +146,7 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
         torch.cuda.synchronize()
         # print(feats.shape,pitch.shape)
         feats = feats[:, :p_len, :]
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
             pitch = pitch[:p_len]
             pitchf = pitchf[:p_len]
             pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
@@ -155,17 +155,15 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
         ii = 0  # sid
         sid = torch.LongTensor([ii]).to(device)
         with torch.no_grad():
-            if(self.if_f0==1):
+            if self.if_f0 == 1:
                 infered_audio = (
                     self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
                     .data.cpu()
                     .float()
                 )
             else:
-                 infered_audio = (
-                    self.net_g.infer(feats, p_len, sid)[0][0, 0]
-                    .data.cpu()
-                    .float()
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
                 )
         torch.cuda.synchronize()
         return infered_audio
@@ -387,7 +385,7 @@ def start_vc(self):
             self.config.pth_path,
             self.config.index_path,
             self.config.npy_path,
-            self.config.index_rate
+            self.config.index_rate,
         )
         self.input_wav: np.ndarray = np.zeros(
             self.extra_frame
@@ -511,7 +509,6 @@ def audio_callback(
         total_time = time.perf_counter() - start_time
         self.window["infer_time"].update(int(total_time * 1000))
         print("infer time:" + str(total_time))
-        
 
     def get_devices(self, update: bool = True):
         """获取设备列表"""
diff --git a/i18n.py b/i18n.py
@@ -11,8 +11,10 @@ def load_language_list(language):
 
 class I18nAuto:
     def __init__(self, language=None):
-        if language in ['auto', None]:
-            language = locale.getdefaultlocale()[0]#getlocale can't identify the system's language ((None, None))
+        if language in ["auto", None]:
+            language = locale.getdefaultlocale()[
+                0
+            ]  # getlocale can't identify the system's language ((None, None))
         if not os.path.exists(f"./i18n/{language}.json"):
             language = "en_US"
         self.language = language
diff --git a/infer-web.py b/infer-web.py
@@ -119,7 +119,6 @@ def load_hubert():
         uvr5_names.append(name.replace(".pth", ""))
 
 
-
 def vc_single(
     sid,
     input_audio,
@@ -888,23 +887,27 @@ def change_info_(ckpt_path):
 
 from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
+
+
 def export_onnx(ModelPath, ExportedPath, MoeVS=True):
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
     print(*cpt["config"])
 
-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
 
-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）
 
     if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
         output_names = [
@@ -934,7 +937,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
             output_names=output_names,
         )
     else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
         output_names = [
@@ -963,6 +968,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
         )
     return "Finished"
 
+
 with gr.Blocks() as app:
     gr.Markdown(
         value=i18n(
@@ -1443,7 +1449,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
             with gr.Row():
                 ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
             with gr.Row():
-                onnx_dir = gr.Textbox(label=i18n("Onnx输出路径"), value="", interactive=True)
+                onnx_dir = gr.Textbox(
+                    label=i18n("Onnx输出路径"), value="", interactive=True
+                )
             with gr.Row():
                 moevs = gr.Checkbox(label=i18n("MoeVS模型"), value=True)
                 infoOnnx = gr.Label(label="Null")
diff --git a/my_utils.py b/my_utils.py
@@ -18,4 +18,4 @@ def load_audio(file, sr):
     except Exception as e:
         raise RuntimeError(f"Failed to load audio: {e}")
 
-    return np.frombuffer(out, np.float32).flatten()
+    return np.frombuffer(out, np.float32).flatten()
diff --git a/train/data_utils.py b/train/data_utils.py
@@ -99,8 +99,8 @@ def get_audio(self, filename):
                 )
             )
         audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()
 
         audio_norm = audio_norm.unsqueeze(0)
         spec_filename = filename.replace(".wav", ".spec.pt")
@@ -291,8 +291,8 @@ def get_audio(self, filename):
                 )
             )
         audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()
 
         audio_norm = audio_norm.unsqueeze(0)
         spec_filename = filename.replace(".wav", ".spec.pt")
diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py
@@ -61,7 +61,9 @@ def norm_write(self, tmp_audio, idx0, idx1):
             self.sr,
             tmp_audio.astype(np.float32),
         )
-        tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)#, res_type="soxr_vhq"
+        tmp_audio = librosa.resample(
+            tmp_audio, orig_sr=self.sr, target_sr=16000
+        )  # , res_type="soxr_vhq"
         wavfile.write(
             "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
             16000,
@@ -72,7 +74,7 @@ def pipeline(self, path, idx0):
         try:
             audio = load_audio(path, self.sr)
             # zero phased digital filter cause pre-ringing noise...
-            # audio = signal.filtfilt(self.bh, self.ah, audio) 
+            # audio = signal.filtfilt(self.bh, self.ah, audio)
             audio = signal.lfilter(self.bh, self.ah, audio)
 
             idx1 = 0

Original file line number	Diff line number	Diff line change
`@@ -99,8 +99,8 @@ def get_audio(self, filename):`
`99`	`99`	`)`
`100`	`100`	`)`
`101`	`101`	`audio_norm = audio`
`102`		`-# audio_norm = audio / self.max_wav_value`
`103`		`-# audio_norm = audio / np.abs(audio).max()`
	`102`	`+ # audio_norm = audio / self.max_wav_value`
	`103`	`+ # audio_norm = audio / np.abs(audio).max()`
`104`	`104`
`105`	`105`	`audio_norm = audio_norm.unsqueeze(0)`
`106`	`106`	`spec_filename = filename.replace(".wav", ".spec.pt")`
`@@ -291,8 +291,8 @@ def get_audio(self, filename):`
`291`	`291`	`)`
`292`	`292`	`)`
`293`	`293`	`audio_norm = audio`
`294`		`-# audio_norm = audio / self.max_wav_value`
`295`		`-# audio_norm = audio / np.abs(audio).max()`
	`294`	`+ # audio_norm = audio / self.max_wav_value`
	`295`	`+ # audio_norm = audio / np.abs(audio).max()`
`296`	`296`
`297`	`297`	`audio_norm = audio_norm.unsqueeze(0)`
`298`	`298`	`spec_filename = filename.replace(".wav", ".spec.pt")`