Skip to content

Commit a6456f6

Browse files
Format code (#1193)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 72a18e6 commit a6456f6

File tree

15 files changed

+566
-241
lines changed

15 files changed

+566
-241
lines changed

configs/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
from multiprocessing import cpu_count
66

77
import torch
8+
89
try:
9-
import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
10+
import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
11+
1012
if torch.xpu.is_available():
1113
from infer.modules.ipex import ipex_init
14+
1215
ipex_init()
1316
except Exception:
1417
pass

gui_v1.py

Lines changed: 107 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -478,15 +478,28 @@ def start_vc(self):
478478
inp_q,
479479
opt_q,
480480
device,
481-
self.rvc if hasattr(self, "rvc") else None
481+
self.rvc if hasattr(self, "rvc") else None,
482482
)
483483
self.config.samplerate = self.rvc.tgt_sr
484484
self.zc = self.rvc.tgt_sr // 100
485-
self.block_frame = int(np.round(self.config.block_time * self.config.samplerate / self.zc)) * self.zc
485+
self.block_frame = (
486+
int(np.round(self.config.block_time * self.config.samplerate / self.zc))
487+
* self.zc
488+
)
486489
self.block_frame_16k = 160 * self.block_frame // self.zc
487-
self.crossfade_frame = int(np.round(self.config.crossfade_time * self.config.samplerate / self.zc)) * self.zc
490+
self.crossfade_frame = (
491+
int(
492+
np.round(
493+
self.config.crossfade_time * self.config.samplerate / self.zc
494+
)
495+
)
496+
* self.zc
497+
)
488498
self.sola_search_frame = self.zc
489-
self.extra_frame = int(np.round(self.config.extra_time * self.config.samplerate / self.zc)) * self.zc
499+
self.extra_frame = (
500+
int(np.round(self.config.extra_time * self.config.samplerate / self.zc))
501+
* self.zc
502+
)
490503
self.input_wav: torch.Tensor = torch.zeros(
491504
self.extra_frame
492505
+ self.crossfade_frame
@@ -495,7 +508,11 @@ def start_vc(self):
495508
device=device,
496509
dtype=torch.float32,
497510
)
498-
self.input_wav_res: torch.Tensor= torch.zeros(160 * self.input_wav.shape[0] // self.zc, device=device,dtype=torch.float32)
511+
self.input_wav_res: torch.Tensor = torch.zeros(
512+
160 * self.input_wav.shape[0] // self.zc,
513+
device=device,
514+
dtype=torch.float32,
515+
)
499516
self.pitch: np.ndarray = np.zeros(
500517
self.input_wav.shape[0] // self.zc,
501518
dtype="int32",
@@ -509,7 +526,9 @@ def start_vc(self):
509526
)
510527
self.nr_buffer: torch.Tensor = self.sola_buffer.clone()
511528
self.output_buffer: torch.Tensor = self.input_wav.clone()
512-
self.res_buffer: torch.Tensor = torch.zeros(2 * self.zc, device=device,dtype=torch.float32)
529+
self.res_buffer: torch.Tensor = torch.zeros(
530+
2 * self.zc, device=device, dtype=torch.float32
531+
)
513532
self.valid_rate = 1 - (self.extra_frame - 1) / self.input_wav.shape[0]
514533
self.fade_in_window: torch.Tensor = (
515534
torch.sin(
@@ -529,7 +548,9 @@ def start_vc(self):
529548
self.resampler = tat.Resample(
530549
orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
531550
).to(device)
532-
self.tg = TorchGate(sr=self.config.samplerate, n_fft=4*self.zc, prop_decrease=0.9).to(device)
551+
self.tg = TorchGate(
552+
sr=self.config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9
553+
).to(device)
533554
thread_vc = threading.Thread(target=self.soundinput)
534555
thread_vc.start()
535556

@@ -560,36 +581,52 @@ def audio_callback(
560581
indata = librosa.to_mono(indata.T)
561582
if self.config.threhold > -60:
562583
rms = librosa.feature.rms(
563-
y=indata, frame_length=4*self.zc, hop_length=self.zc
584+
y=indata, frame_length=4 * self.zc, hop_length=self.zc
564585
)
565586
db_threhold = (
566587
librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
567588
)
568589
for i in range(db_threhold.shape[0]):
569590
if db_threhold[i]:
570591
indata[i * self.zc : (i + 1) * self.zc] = 0
571-
self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone()
572-
self.input_wav[-self.block_frame: ] = torch.from_numpy(indata).to(device)
573-
self.input_wav_res[ : -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone()
592+
self.input_wav[: -self.block_frame] = self.input_wav[
593+
self.block_frame :
594+
].clone()
595+
self.input_wav[-self.block_frame :] = torch.from_numpy(indata).to(device)
596+
self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[
597+
self.block_frame_16k :
598+
].clone()
574599
# input noise reduction and resampling
575600
if self.config.I_noise_reduce:
576-
input_wav = self.input_wav[-self.crossfade_frame -self.block_frame-2*self.zc: ]
577-
input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0))[0, 2*self.zc:]
601+
input_wav = self.input_wav[
602+
-self.crossfade_frame - self.block_frame - 2 * self.zc :
603+
]
604+
input_wav = self.tg(
605+
input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)
606+
)[0, 2 * self.zc :]
578607
input_wav[: self.crossfade_frame] *= self.fade_in_window
579-
input_wav[: self.crossfade_frame] += self.nr_buffer * self.fade_out_window
580-
self.nr_buffer[:] = input_wav[-self.crossfade_frame: ]
581-
input_wav = torch.cat((self.res_buffer[:], input_wav[: self.block_frame]))
582-
self.res_buffer[:] = input_wav[-2*self.zc: ]
583-
self.input_wav_res[-self.block_frame_16k-160: ] = self.resampler(input_wav)[160: ]
608+
input_wav[: self.crossfade_frame] += (
609+
self.nr_buffer * self.fade_out_window
610+
)
611+
self.nr_buffer[:] = input_wav[-self.crossfade_frame :]
612+
input_wav = torch.cat(
613+
(self.res_buffer[:], input_wav[: self.block_frame])
614+
)
615+
self.res_buffer[:] = input_wav[-2 * self.zc :]
616+
self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
617+
input_wav
618+
)[160:]
584619
else:
585-
self.input_wav_res[-self.block_frame_16k-160: ] = self.resampler(self.input_wav[-self.block_frame-2*self.zc: ])[160: ]
620+
self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
621+
self.input_wav[-self.block_frame - 2 * self.zc :]
622+
)[160:]
586623
# infer
587624
f0_extractor_frame = self.block_frame_16k + 800
588-
if self.config.f0method == 'rmvpe':
625+
if self.config.f0method == "rmvpe":
589626
f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1)
590627
infer_wav = self.rvc.infer(
591628
self.input_wav_res,
592-
self.input_wav_res[-f0_extractor_frame :].cpu().numpy(),
629+
self.input_wav_res[-f0_extractor_frame:].cpu().numpy(),
593630
self.block_frame_16k,
594631
self.valid_rate,
595632
self.pitch,
@@ -601,48 +638,77 @@ def audio_callback(
601638
]
602639
# output noise reduction
603640
if self.config.O_noise_reduce:
604-
self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone()
605-
self.output_buffer[-self.block_frame: ] = infer_wav[-self.block_frame:]
606-
infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0)
641+
self.output_buffer[: -self.block_frame] = self.output_buffer[
642+
self.block_frame :
643+
].clone()
644+
self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :]
645+
infer_wav = self.tg(
646+
infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)
647+
).squeeze(0)
607648
# volume envelop mixing
608649
if self.config.rms_mix_rate < 1:
609650
rms1 = librosa.feature.rms(
610-
y=self.input_wav_res[-160*infer_wav.shape[0]//self.zc :].cpu().numpy(),
611-
frame_length=640,
612-
hop_length=160,
651+
y=self.input_wav_res[-160 * infer_wav.shape[0] // self.zc :]
652+
.cpu()
653+
.numpy(),
654+
frame_length=640,
655+
hop_length=160,
613656
)
614657
rms1 = torch.from_numpy(rms1).to(device)
615658
rms1 = F.interpolate(
616-
rms1.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear",align_corners=True,
617-
)[0,0,:-1]
659+
rms1.unsqueeze(0),
660+
size=infer_wav.shape[0] + 1,
661+
mode="linear",
662+
align_corners=True,
663+
)[0, 0, :-1]
618664
rms2 = librosa.feature.rms(
619-
y=infer_wav[:].cpu().numpy(), frame_length=4*self.zc, hop_length=self.zc
665+
y=infer_wav[:].cpu().numpy(),
666+
frame_length=4 * self.zc,
667+
hop_length=self.zc,
620668
)
621669
rms2 = torch.from_numpy(rms2).to(device)
622670
rms2 = F.interpolate(
623-
rms2.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear",align_corners=True,
624-
)[0,0,:-1]
671+
rms2.unsqueeze(0),
672+
size=infer_wav.shape[0] + 1,
673+
mode="linear",
674+
align_corners=True,
675+
)[0, 0, :-1]
625676
rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
626-
infer_wav *= torch.pow(rms1 / rms2, torch.tensor(1 - self.config.rms_mix_rate))
677+
infer_wav *= torch.pow(
678+
rms1 / rms2, torch.tensor(1 - self.config.rms_mix_rate)
679+
)
627680
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
628-
conv_input = infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame]
681+
conv_input = infer_wav[
682+
None, None, : self.crossfade_frame + self.sola_search_frame
683+
]
629684
cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
630685
cor_den = torch.sqrt(
631-
F.conv1d(conv_input ** 2, torch.ones(1, 1, self.crossfade_frame, device=device)) + 1e-8)
686+
F.conv1d(
687+
conv_input**2,
688+
torch.ones(1, 1, self.crossfade_frame, device=device),
689+
)
690+
+ 1e-8
691+
)
632692
if sys.platform == "darwin":
633693
_, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
634694
sola_offset = sola_offset.item()
635695
else:
636696
sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
637697
logger.debug("sola_offset = %d", int(sola_offset))
638-
infer_wav = infer_wav[sola_offset: sola_offset + self.block_frame + self.crossfade_frame]
698+
infer_wav = infer_wav[
699+
sola_offset : sola_offset + self.block_frame + self.crossfade_frame
700+
]
639701
infer_wav[: self.crossfade_frame] *= self.fade_in_window
640-
infer_wav[: self.crossfade_frame] += self.sola_buffer *self.fade_out_window
641-
self.sola_buffer[:] = infer_wav[-self.crossfade_frame:]
702+
infer_wav[: self.crossfade_frame] += self.sola_buffer * self.fade_out_window
703+
self.sola_buffer[:] = infer_wav[-self.crossfade_frame :]
642704
if sys.platform == "darwin":
643-
outdata[:] = infer_wav[:-self.crossfade_frame].cpu().numpy()[:, np.newaxis]
705+
outdata[:] = (
706+
infer_wav[: -self.crossfade_frame].cpu().numpy()[:, np.newaxis]
707+
)
644708
else:
645-
outdata[:] = infer_wav[:-self.crossfade_frame].repeat(2, 1).t().cpu().numpy()
709+
outdata[:] = (
710+
infer_wav[: -self.crossfade_frame].repeat(2, 1).t().cpu().numpy()
711+
)
646712
total_time = time.perf_counter() - start_time
647713
self.window["infer_time"].update(int(total_time * 1000))
648714
logger.info("Infer time: %.2f", total_time)
@@ -698,9 +764,7 @@ def set_devices(self, input_device, output_device):
698764
sd.default.device[1] = output_device_indices[
699765
output_devices.index(output_device)
700766
]
701-
logger.info(
702-
"Input device: %s:%s", str(sd.default.device[0]), input_device
703-
)
767+
logger.info("Input device: %s:%s", str(sd.default.device[0]), input_device)
704768
logger.info(
705769
"Output device: %s:%s", str(sd.default.device[1]), output_device
706770
)

infer-web.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,7 +1028,7 @@ def change_f0_method(f0method8):
10281028
fn=vc.get_vc,
10291029
inputs=[sid0, protect0, protect1],
10301030
outputs=[spk_item, protect0, protect1, file_index2, file_index4],
1031-
api_name="infer_change_voice"
1031+
api_name="infer_change_voice",
10321032
)
10331033
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
10341034
with gr.Group():

infer/lib/audio.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,49 @@
33
import av
44
from io import BytesIO
55

6+
67
def wav2(i, o, format):
7-
inp = av.open(i, 'rb')
8-
if format == "m4a": format = "mp4"
9-
out = av.open(o, 'wb', format=format)
10-
if format == "ogg": format = "libvorbis"
11-
if format == "mp4": format = "aac"
8+
inp = av.open(i, "rb")
9+
if format == "m4a":
10+
format = "mp4"
11+
out = av.open(o, "wb", format=format)
12+
if format == "ogg":
13+
format = "libvorbis"
14+
if format == "mp4":
15+
format = "aac"
1216

1317
ostream = out.add_stream(format)
1418

1519
for frame in inp.decode(audio=0):
16-
for p in ostream.encode(frame): out.mux(p)
20+
for p in ostream.encode(frame):
21+
out.mux(p)
1722

18-
for p in ostream.encode(None): out.mux(p)
23+
for p in ostream.encode(None):
24+
out.mux(p)
1925

2026
out.close()
2127
inp.close()
2228

29+
2330
def audio2(i, o, format, sr):
24-
inp = av.open(i, 'rb')
25-
out = av.open(o, 'wb', format=format)
26-
if format == "ogg": format = "libvorbis"
27-
if format == "f32le": format = "pcm_f32le"
31+
inp = av.open(i, "rb")
32+
out = av.open(o, "wb", format=format)
33+
if format == "ogg":
34+
format = "libvorbis"
35+
if format == "f32le":
36+
format = "pcm_f32le"
2837

2938
ostream = out.add_stream(format, channels=1)
3039
ostream.sample_rate = sr
3140

3241
for frame in inp.decode(audio=0):
33-
for p in ostream.encode(frame): out.mux(p)
42+
for p in ostream.encode(frame):
43+
out.mux(p)
3444

3545
out.close()
3646
inp.close()
3747

48+
3849
def load_audio(file, sr):
3950
try:
4051
file = (

infer/lib/infer_pack/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
has_xpu = bool(hasattr(torch, "xpu") and torch.xpu.is_available())
1717

18+
1819
class TextEncoder256(nn.Module):
1920
def __init__(
2021
self,
@@ -1158,7 +1159,9 @@ def forward(self, x):
11581159
if t % self.period != 0: # pad first
11591160
n_pad = self.period - (t % self.period)
11601161
if has_xpu and x.dtype == torch.bfloat16:
1161-
x = F.pad(x.to(dtype=torch.float16), (0, n_pad), "reflect").to(dtype=torch.bfloat16)
1162+
x = F.pad(x.to(dtype=torch.float16), (0, n_pad), "reflect").to(
1163+
dtype=torch.bfloat16
1164+
)
11621165
else:
11631166
x = F.pad(x, (0, n_pad), "reflect")
11641167
t = t + n_pad

infer/lib/rmvpe.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import numpy as np
44
import torch
5+
56
try:
6-
#Fix "Torch not compiled with CUDA enabled"
7-
import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
7+
# Fix "Torch not compiled with CUDA enabled"
8+
import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
9+
810
if torch.xpu.is_available():
911
from infer.modules.ipex import ipex_init
12+
1013
ipex_init()
1114
except Exception:
1215
pass

0 commit comments

Comments
 (0)