Skip to content

Commit ef9db1f

Browse files
committed
fix(rt): replace with new f0
1 parent 51c85fc commit ef9db1f

File tree

4 files changed

+23
-117
lines changed

4 files changed

+23
-117
lines changed

infer/lib/rtrvc.py

Lines changed: 14 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from io import BytesIO
22
import os
33
from typing import Union, Literal, Optional
4+
from pathlib import Path
45

56
import fairseq
67
import faiss
@@ -10,7 +11,7 @@
1011
import torch.nn.functional as F
1112
from torchaudio.transforms import Resample
1213

13-
from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE
14+
from rvc.f0 import Generator
1415
from rvc.synthesizer import load_synthesizer
1516

1617

@@ -65,14 +66,7 @@ def forward_dml(ctx, x, scale):
6566

6667
self.resample_kernel = {}
6768

68-
self.f0_methods = {
69-
"crepe": self._get_f0_crepe,
70-
"rmvpe": self._get_f0_rmvpe,
71-
"fcpe": self._get_f0_fcpe,
72-
"pm": self._get_f0_pm,
73-
"harvest": self._get_f0_harvest,
74-
"dio": self._get_f0_dio,
75-
}
69+
self.f0_gen = Generator(Path(os.environ["rmvpe_root"]), is_half, 0, device, self.window, self.sr)
7670

7771
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
7872
["assets/hubert/hubert_base.pt"],
@@ -141,7 +135,6 @@ def infer(
141135
skip_head: int,
142136
return_length: int,
143137
f0method: Union[tuple, str],
144-
inp_f0: Optional[np.ndarray] = None,
145138
protect: float = 1.0,
146139
) -> np.ndarray:
147140
with torch.no_grad():
@@ -205,16 +198,11 @@ def infer(
205198
f0_extractor_frame = (
206199
5120 * ((f0_extractor_frame - 1) // 5120 + 1) - self.window
207200
)
208-
if inp_f0 is not None:
209-
pitch, pitchf = self._get_f0_post(
210-
inp_f0, self.f0_up_key - self.formant_shift
211-
)
212-
else:
213-
pitch, pitchf = self._get_f0(
214-
input_wav[-f0_extractor_frame:],
215-
self.f0_up_key - self.formant_shift,
216-
method=f0method,
217-
)
201+
pitch, pitchf = self._get_f0(
202+
input_wav[-f0_extractor_frame:],
203+
self.f0_up_key - self.formant_shift,
204+
method=f0method,
205+
)
218206
shift = block_frame_16k // self.window
219207
self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
220208
self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone()
@@ -275,89 +263,9 @@ def _get_f0(
275263
filter_radius: Optional[Union[int, float]] = None,
276264
method: Literal["crepe", "rmvpe", "fcpe", "pm", "harvest", "dio"] = "fcpe",
277265
):
278-
if method not in self.f0_methods.keys():
279-
raise RuntimeError("Not supported f0 method: " + method)
280-
return self.f0_methods[method](x, f0_up_key, filter_radius)
281-
282-
def _get_f0_post(self, f0, f0_up_key):
283-
f0 *= pow(2, f0_up_key / 12)
284-
if not torch.is_tensor(f0):
285-
f0 = torch.from_numpy(f0)
286-
f0 = f0.float().to(self.device).squeeze()
287-
f0_mel = 1127 * torch.log(1 + f0 / 700)
288-
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
289-
self.f0_mel_max - self.f0_mel_min
290-
) + 1
291-
f0_mel[f0_mel <= 1] = 1
292-
f0_mel[f0_mel > 255] = 255
293-
f0_coarse = torch.round(f0_mel).long()
294-
return f0_coarse, f0
295-
296-
def _get_f0_pm(self, x, f0_up_key, filter_radius):
297-
if not hasattr(self, "pm"):
298-
self.pm = PM(hop_length=160, sampling_rate=16000)
299-
f0 = self.pm.compute_f0(x.cpu().numpy())
300-
return self._get_f0_post(f0, f0_up_key)
301-
302-
def _get_f0_harvest(self, x, f0_up_key, filter_radius=3):
303-
if not hasattr(self, "harvest"):
304-
self.harvest = Harvest(
305-
self.window,
306-
self.f0_min,
307-
self.f0_max,
308-
self.sr,
309-
)
310-
if filter_radius is None:
311-
filter_radius = 3
312-
f0 = self.harvest.compute_f0(x.cpu().numpy(), filter_radius=filter_radius)
313-
return self._get_f0_post(f0, f0_up_key)
314-
315-
def _get_f0_dio(self, x, f0_up_key, filter_radius):
316-
if not hasattr(self, "dio"):
317-
self.dio = Dio(
318-
self.window,
319-
self.f0_min,
320-
self.f0_max,
321-
self.sr,
322-
)
323-
f0 = self.dio.compute_f0(x.cpu().numpy())
324-
return self._get_f0_post(f0, f0_up_key)
325-
326-
def _get_f0_crepe(self, x, f0_up_key, filter_radius):
327-
if hasattr(self, "crepe") == False:
328-
self.crepe = CRePE(
329-
self.window,
330-
self.f0_min,
331-
self.f0_max,
332-
self.sr,
333-
self.device,
334-
)
335-
f0 = self.crepe.compute_f0(x)
336-
return self._get_f0_post(f0, f0_up_key)
337-
338-
def _get_f0_rmvpe(self, x, f0_up_key, filter_radius=0.03):
339-
if hasattr(self, "rmvpe") == False:
340-
self.rmvpe = RMVPE(
341-
"%s/rmvpe.pt" % os.environ["rmvpe_root"],
342-
is_half=self.is_half,
343-
device=self.device,
344-
use_jit=self.use_jit,
345-
)
346-
if filter_radius is None:
347-
filter_radius = 0.03
348-
return self._get_f0_post(
349-
self.rmvpe.compute_f0(x, filter_radius=filter_radius),
350-
f0_up_key,
351-
)
352-
353-
def _get_f0_fcpe(self, x, f0_up_key, filter_radius):
354-
if hasattr(self, "fcpe") == False:
355-
self.fcpe = FCPE(
356-
160,
357-
self.f0_min,
358-
self.f0_max,
359-
16000,
360-
self.device,
361-
)
362-
f0 = self.fcpe.compute_f0(x)
363-
return self._get_f0_post(f0, f0_up_key)
266+
c, f = self.f0_gen.calculate(x, None, f0_up_key, method, filter_radius)
267+
if not torch.is_tensor(c):
268+
c = torch.from_numpy(c)
269+
if not torch.is_tensor(f):
270+
f = torch.from_numpy(f)
271+
return c.long().to(self.device), f.float().to(self.device)

rvc/f0/gen.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
from numba import jit
66
import numpy as np
7+
import torch
78

89

910
@jit(nopython=True)
1011
def post_process(
11-
sr: int,
12-
window: int,
12+
tf0: int, # 每秒f0点数
1313
f0: np.ndarray,
1414
f0_up_key: int,
1515
manual_x_pad: int,
@@ -19,7 +19,6 @@ def post_process(
1919
) -> Tuple[np.ndarray, np.ndarray]:
2020
f0 = np.multiply(f0, pow(2, f0_up_key / 12))
2121
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
22-
tf0 = sr // window # 每秒f0点数
2322
if manual_f0 is not None:
2423
delta_t = np.round(
2524
(manual_f0[:, 0].max() - manual_f0[:, 0].min()) * tf0 + 1
@@ -62,12 +61,14 @@ def __init__(
6261
def calculate(
6362
self,
6463
x: np.ndarray,
65-
p_len: int,
64+
p_len: Optional[int],
6665
f0_up_key: int,
6766
f0_method: Literal["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"],
6867
filter_radius: Optional[Union[int, float]],
6968
manual_f0: Optional[Union[np.ndarray, list]] = None,
7069
) -> Tuple[np.ndarray, np.ndarray]:
70+
if torch.is_tensor(x):
71+
x = x.cpu().numpy()
7172
f0_min = 50
7273
f0_max = 1100
7374
if f0_method == "pm":
@@ -130,8 +131,7 @@ def calculate(
130131
raise ValueError(f"f0 method {f0_method} has not yet been supported")
131132

132133
return post_process(
133-
self.sr,
134-
self.window,
134+
self.sr // self.window,
135135
f0,
136136
f0_up_key,
137137
self.x_pad,

rvc/synthesizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def load_synthesizer(
3131
pth_path: torch.serialization.FILE_LIKE, device=torch.device("cpu")
3232
):
3333
return get_synthesizer(
34-
torch.load(pth_path, map_location=torch.device("cpu")),
34+
torch.load(pth_path, map_location=torch.device("cpu"), weights_only=True),
3535
device,
3636
)
3737

web.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -964,9 +964,7 @@ def change_info_(ckpt_path):
964964
"Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement"
965965
),
966966
choices=(
967-
["pm", "harvest", "crepe", "rmvpe"]
968-
if config.dml == False
969-
else ["pm", "harvest", "rmvpe"]
967+
["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"]
970968
),
971969
value="rmvpe",
972970
interactive=True,
@@ -1209,7 +1207,7 @@ def change_info_(ckpt_path):
12091207
label=i18n(
12101208
"Select the pitch extraction algorithm: when extracting singing, you can use 'pm' to speed up. For high-quality speech with fast performance, but worse CPU usage, you can use 'dio'. 'harvest' results in better quality but is slower. 'rmvpe' has the best results and consumes less CPU/GPU"
12111209
),
1212-
choices=["pm", "harvest", "dio", "rmvpe"],
1210+
choices=["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"],
12131211
value="rmvpe",
12141212
interactive=True,
12151213
)

0 commit comments

Comments
 (0)