|
1 | 1 | from io import BytesIO
|
2 | 2 | import os
|
3 | 3 | from typing import Union, Literal, Optional
|
| 4 | +from pathlib import Path |
4 | 5 |
|
5 | 6 | import fairseq
|
6 | 7 | import faiss
|
|
10 | 11 | import torch.nn.functional as F
|
11 | 12 | from torchaudio.transforms import Resample
|
12 | 13 |
|
13 |
| -from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE |
| 14 | +from rvc.f0 import Generator |
14 | 15 | from rvc.synthesizer import load_synthesizer
|
15 | 16 |
|
16 | 17 |
|
@@ -65,14 +66,7 @@ def forward_dml(ctx, x, scale):
|
65 | 66 |
|
66 | 67 | self.resample_kernel = {}
|
67 | 68 |
|
68 |
| - self.f0_methods = { |
69 |
| - "crepe": self._get_f0_crepe, |
70 |
| - "rmvpe": self._get_f0_rmvpe, |
71 |
| - "fcpe": self._get_f0_fcpe, |
72 |
| - "pm": self._get_f0_pm, |
73 |
| - "harvest": self._get_f0_harvest, |
74 |
| - "dio": self._get_f0_dio, |
75 |
| - } |
| 69 | + self.f0_gen = Generator(Path(os.environ["rmvpe_root"]), is_half, 0, device, self.window, self.sr) |
76 | 70 |
|
77 | 71 | models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
|
78 | 72 | ["assets/hubert/hubert_base.pt"],
|
@@ -141,7 +135,6 @@ def infer(
|
141 | 135 | skip_head: int,
|
142 | 136 | return_length: int,
|
143 | 137 | f0method: Union[tuple, str],
|
144 |
| - inp_f0: Optional[np.ndarray] = None, |
145 | 138 | protect: float = 1.0,
|
146 | 139 | ) -> np.ndarray:
|
147 | 140 | with torch.no_grad():
|
@@ -205,16 +198,11 @@ def infer(
|
205 | 198 | f0_extractor_frame = (
|
206 | 199 | 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - self.window
|
207 | 200 | )
|
208 |
| - if inp_f0 is not None: |
209 |
| - pitch, pitchf = self._get_f0_post( |
210 |
| - inp_f0, self.f0_up_key - self.formant_shift |
211 |
| - ) |
212 |
| - else: |
213 |
| - pitch, pitchf = self._get_f0( |
214 |
| - input_wav[-f0_extractor_frame:], |
215 |
| - self.f0_up_key - self.formant_shift, |
216 |
| - method=f0method, |
217 |
| - ) |
| 201 | + pitch, pitchf = self._get_f0( |
| 202 | + input_wav[-f0_extractor_frame:], |
| 203 | + self.f0_up_key - self.formant_shift, |
| 204 | + method=f0method, |
| 205 | + ) |
218 | 206 | shift = block_frame_16k // self.window
|
219 | 207 | self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
|
220 | 208 | self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone()
|
@@ -275,89 +263,9 @@ def _get_f0(
|
275 | 263 | filter_radius: Optional[Union[int, float]] = None,
|
276 | 264 | method: Literal["crepe", "rmvpe", "fcpe", "pm", "harvest", "dio"] = "fcpe",
|
277 | 265 | ):
|
278 |
| - if method not in self.f0_methods.keys(): |
279 |
| - raise RuntimeError("Not supported f0 method: " + method) |
280 |
| - return self.f0_methods[method](x, f0_up_key, filter_radius) |
281 |
| - |
282 |
| - def _get_f0_post(self, f0, f0_up_key): |
283 |
| - f0 *= pow(2, f0_up_key / 12) |
284 |
| - if not torch.is_tensor(f0): |
285 |
| - f0 = torch.from_numpy(f0) |
286 |
| - f0 = f0.float().to(self.device).squeeze() |
287 |
| - f0_mel = 1127 * torch.log(1 + f0 / 700) |
288 |
| - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / ( |
289 |
| - self.f0_mel_max - self.f0_mel_min |
290 |
| - ) + 1 |
291 |
| - f0_mel[f0_mel <= 1] = 1 |
292 |
| - f0_mel[f0_mel > 255] = 255 |
293 |
| - f0_coarse = torch.round(f0_mel).long() |
294 |
| - return f0_coarse, f0 |
295 |
| - |
296 |
| - def _get_f0_pm(self, x, f0_up_key, filter_radius): |
297 |
| - if not hasattr(self, "pm"): |
298 |
| - self.pm = PM(hop_length=160, sampling_rate=16000) |
299 |
| - f0 = self.pm.compute_f0(x.cpu().numpy()) |
300 |
| - return self._get_f0_post(f0, f0_up_key) |
301 |
| - |
302 |
| - def _get_f0_harvest(self, x, f0_up_key, filter_radius=3): |
303 |
| - if not hasattr(self, "harvest"): |
304 |
| - self.harvest = Harvest( |
305 |
| - self.window, |
306 |
| - self.f0_min, |
307 |
| - self.f0_max, |
308 |
| - self.sr, |
309 |
| - ) |
310 |
| - if filter_radius is None: |
311 |
| - filter_radius = 3 |
312 |
| - f0 = self.harvest.compute_f0(x.cpu().numpy(), filter_radius=filter_radius) |
313 |
| - return self._get_f0_post(f0, f0_up_key) |
314 |
| - |
315 |
| - def _get_f0_dio(self, x, f0_up_key, filter_radius): |
316 |
| - if not hasattr(self, "dio"): |
317 |
| - self.dio = Dio( |
318 |
| - self.window, |
319 |
| - self.f0_min, |
320 |
| - self.f0_max, |
321 |
| - self.sr, |
322 |
| - ) |
323 |
| - f0 = self.dio.compute_f0(x.cpu().numpy()) |
324 |
| - return self._get_f0_post(f0, f0_up_key) |
325 |
| - |
326 |
| - def _get_f0_crepe(self, x, f0_up_key, filter_radius): |
327 |
| - if hasattr(self, "crepe") == False: |
328 |
| - self.crepe = CRePE( |
329 |
| - self.window, |
330 |
| - self.f0_min, |
331 |
| - self.f0_max, |
332 |
| - self.sr, |
333 |
| - self.device, |
334 |
| - ) |
335 |
| - f0 = self.crepe.compute_f0(x) |
336 |
| - return self._get_f0_post(f0, f0_up_key) |
337 |
| - |
338 |
| - def _get_f0_rmvpe(self, x, f0_up_key, filter_radius=0.03): |
339 |
| - if hasattr(self, "rmvpe") == False: |
340 |
| - self.rmvpe = RMVPE( |
341 |
| - "%s/rmvpe.pt" % os.environ["rmvpe_root"], |
342 |
| - is_half=self.is_half, |
343 |
| - device=self.device, |
344 |
| - use_jit=self.use_jit, |
345 |
| - ) |
346 |
| - if filter_radius is None: |
347 |
| - filter_radius = 0.03 |
348 |
| - return self._get_f0_post( |
349 |
| - self.rmvpe.compute_f0(x, filter_radius=filter_radius), |
350 |
| - f0_up_key, |
351 |
| - ) |
352 |
| - |
353 |
| - def _get_f0_fcpe(self, x, f0_up_key, filter_radius): |
354 |
| - if hasattr(self, "fcpe") == False: |
355 |
| - self.fcpe = FCPE( |
356 |
| - 160, |
357 |
| - self.f0_min, |
358 |
| - self.f0_max, |
359 |
| - 16000, |
360 |
| - self.device, |
361 |
| - ) |
362 |
| - f0 = self.fcpe.compute_f0(x) |
363 |
| - return self._get_f0_post(f0, f0_up_key) |
| 266 | + c, f = self.f0_gen.calculate(x, None, f0_up_key, method, filter_radius) |
| 267 | + if not torch.is_tensor(c): |
| 268 | + c = torch.from_numpy(c) |
| 269 | + if not torch.is_tensor(f): |
| 270 | + f = torch.from_numpy(f) |
| 271 | + return c.long().to(self.device), f.float().to(self.device) |
0 commit comments