Skip to content

Commit 3b7d7c6

Browse files
committed
optimize(f0): move fcpe into rvc.f0
1 parent 24dbc5e commit 3b7d7c6

File tree

6 files changed

+70
-32
lines changed

6 files changed

+70
-32
lines changed

infer/lib/rtrvc.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import torch
1414
import torch.nn as nn
1515
import torch.nn.functional as F
16-
import torchcrepe
1716
from torchaudio.transforms import Resample
1817

1918
from rvc.synthesizer import load_synthesizer
@@ -323,20 +322,17 @@ def get_f0_rmvpe(self, x, f0_up_key):
323322

324323
def get_f0_fcpe(self, x, f0_up_key):
325324
if hasattr(self, "model_fcpe") == False:
326-
from torchfcpe import spawn_bundled_infer_model
325+
from rvc.f0 import FCPE
327326

328327
printt("Loading fcpe model")
329-
if "privateuseone" in str(self.device):
330-
self.device_fcpe = "cpu"
331-
else:
332-
self.device_fcpe = self.device
333-
self.model_fcpe = spawn_bundled_infer_model(self.device_fcpe)
334-
f0 = self.model_fcpe.infer(
335-
x.to(self.device_fcpe).unsqueeze(0).float(),
336-
sr=16000,
337-
decoder_mode="local_argmax",
338-
threshold=0.006,
339-
)
328+
self.model_fcpe = FCPE(
329+
160,
330+
self.f0_min,
331+
self.f0_max,
332+
16000,
333+
self.device,
334+
)
335+
f0 = self.model_fcpe.compute_f0(x)
340336
f0 *= pow(2, f0_up_key / 12)
341337
return self.get_f0_post(f0)
342338

infer/modules/vc/pipeline.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import torch.nn.functional as F
1515
from scipy import signal
1616

17-
from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio
17+
from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE
1818

1919
now_dir = os.getcwd()
2020
sys.path.append(now_dir)
@@ -118,21 +118,15 @@ def get_f0(
118118

119119
elif f0_method == "fcpe":
120120
if not hasattr(self, "model_fcpe"):
121-
from torchfcpe import spawn_bundled_infer_model
122-
123121
logger.info("Loading fcpe model")
124-
self.model_fcpe = spawn_bundled_infer_model(self.device)
125-
f0 = (
126-
self.model_fcpe.infer(
127-
torch.from_numpy(x).to(self.device).unsqueeze(0).float(),
128-
sr=16000,
129-
decoder_mode="local_argmax",
130-
threshold=0.006,
122+
self.model_fcpe = FCPE(
123+
self.window,
124+
f0_min,
125+
f0_max,
126+
self.sr,
127+
self.device,
131128
)
132-
.squeeze()
133-
.cpu()
134-
.numpy()
135-
)
129+
f0 = self.model_fcpe.compute_f0(x, p_len=p_len)
136130

137131
f0 *= pow(2, f0_up_key / 12)
138132
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))

rvc/f0/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
from .crepe import CRePE
44
from .dio import Dio
5+
from .fcpe import FCPE
56
from .harvest import Harvest
67
from .pm import PM
78
from .rmvpe import RMVPE
89

9-
__all__ = ["F0Predictor", "CRePE", "Dio", "Harvest", "PM", "RMVPE"]
10+
__all__ = ["F0Predictor", "CRePE", "Dio", "FCPE", "Harvest", "PM", "RMVPE"]

rvc/f0/crepe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def __init__(
1616
sampling_rate=44100,
1717
device="cpu",
1818
):
19+
if "privateuseone" in str(device):
20+
device = "cpu"
1921
super().__init__(
2022
hop_length,
2123
f0_min,
@@ -32,11 +34,13 @@ def compute_f0(
3234
):
3335
if p_len is None:
3436
p_len = wav.shape[0] // self.hop_length
37+
if not torch.is_tensor(wav):
38+
wav = torch.from_numpy(wav)
3539
# Pick a batch size that doesn't cause memory errors on your gpu
3640
batch_size = 512
3741
# Compute pitch using device 'device'
3842
f0, pd = torchcrepe.predict(
39-
torch.tensor(np.copy(wav))[None].float().to(self.device),
43+
wav.float().to(self.device).unsqueeze(dim=0),
4044
self.sampling_rate,
4145
self.hop_length,
4246
self.f0_min,

rvc/f0/fcpe.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from typing import Any, Optional, Union
2+
3+
import numpy as np
4+
import torch
5+
from torchfcpe import spawn_bundled_infer_model
6+
7+
from .f0 import F0Predictor
8+
9+
10+
class FCPE(F0Predictor):
11+
def __init__(
12+
self,
13+
hop_length=512,
14+
f0_min=50,
15+
f0_max=1100,
16+
sampling_rate=44100,
17+
device="cpu",
18+
):
19+
super().__init__(
20+
hop_length,
21+
f0_min,
22+
f0_max,
23+
sampling_rate,
24+
device,
25+
)
26+
27+
self.model = spawn_bundled_infer_model(self.device)
28+
29+
def compute_f0(
30+
self,
31+
wav: np.ndarray[Any, np.dtype],
32+
p_len: Optional[int] = None,
33+
filter_radius: Optional[Union[int, float]] = 0.006,
34+
):
35+
if p_len is None:
36+
p_len = wav.shape[0] // self.hop_length
37+
if not torch.is_tensor(wav):
38+
wav = torch.from_numpy(wav)
39+
f0 = self.model.infer(
40+
wav.float().to(self.device).unsqueeze(0),
41+
sr=self.sampling_rate,
42+
decoder_mode="local_argmax",
43+
threshold=filter_radius,
44+
).squeeze().cpu().numpy()
45+
return self._interpolate_f0(self._resize_f0(f0, p_len))[0]

web.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -861,9 +861,7 @@ def change_f0_method(f0method8):
861861
"Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement"
862862
),
863863
choices=(
864-
["pm", "dio", "harvest", "rmvpe"]
865-
if config.dml
866-
else ["pm", "dio", "harvest", "crepe", "rmvpe"]
864+
["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"]
867865
),
868866
value="rmvpe",
869867
interactive=True,

0 commit comments

Comments
 (0)