|
1 | 1 | from io import BytesIO |
2 | 2 | import os |
3 | 3 | from typing import Union, Literal, Optional |
| 4 | +from pathlib import Path |
4 | 5 |
|
5 | 6 | import fairseq |
6 | 7 | import faiss |
|
10 | 11 | import torch.nn.functional as F |
11 | 12 | from torchaudio.transforms import Resample |
12 | 13 |
|
13 | | -from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE |
| 14 | +from rvc.f0 import Generator |
14 | 15 | from rvc.synthesizer import load_synthesizer |
15 | 16 |
|
16 | 17 |
|
@@ -65,14 +66,7 @@ def forward_dml(ctx, x, scale): |
65 | 66 |
|
66 | 67 | self.resample_kernel = {} |
67 | 68 |
|
68 | | - self.f0_methods = { |
69 | | - "crepe": self._get_f0_crepe, |
70 | | - "rmvpe": self._get_f0_rmvpe, |
71 | | - "fcpe": self._get_f0_fcpe, |
72 | | - "pm": self._get_f0_pm, |
73 | | - "harvest": self._get_f0_harvest, |
74 | | - "dio": self._get_f0_dio, |
75 | | - } |
| 69 | + self.f0_gen = Generator(Path(os.environ["rmvpe_root"]), is_half, 0, device, self.window, self.sr) |
76 | 70 |
|
77 | 71 | models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( |
78 | 72 | ["assets/hubert/hubert_base.pt"], |
@@ -141,7 +135,6 @@ def infer( |
141 | 135 | skip_head: int, |
142 | 136 | return_length: int, |
143 | 137 | f0method: Union[tuple, str], |
144 | | - inp_f0: Optional[np.ndarray] = None, |
145 | 138 | protect: float = 1.0, |
146 | 139 | ) -> np.ndarray: |
147 | 140 | with torch.no_grad(): |
@@ -205,16 +198,11 @@ def infer( |
205 | 198 | f0_extractor_frame = ( |
206 | 199 | 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - self.window |
207 | 200 | ) |
208 | | - if inp_f0 is not None: |
209 | | - pitch, pitchf = self._get_f0_post( |
210 | | - inp_f0, self.f0_up_key - self.formant_shift |
211 | | - ) |
212 | | - else: |
213 | | - pitch, pitchf = self._get_f0( |
214 | | - input_wav[-f0_extractor_frame:], |
215 | | - self.f0_up_key - self.formant_shift, |
216 | | - method=f0method, |
217 | | - ) |
| 201 | + pitch, pitchf = self._get_f0( |
| 202 | + input_wav[-f0_extractor_frame:], |
| 203 | + self.f0_up_key - self.formant_shift, |
| 204 | + method=f0method, |
| 205 | + ) |
218 | 206 | shift = block_frame_16k // self.window |
219 | 207 | self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone() |
220 | 208 | self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone() |
@@ -275,89 +263,9 @@ def _get_f0( |
275 | 263 | filter_radius: Optional[Union[int, float]] = None, |
276 | 264 | method: Literal["crepe", "rmvpe", "fcpe", "pm", "harvest", "dio"] = "fcpe", |
277 | 265 | ): |
278 | | - if method not in self.f0_methods.keys(): |
279 | | - raise RuntimeError("Not supported f0 method: " + method) |
280 | | - return self.f0_methods[method](x, f0_up_key, filter_radius) |
281 | | - |
282 | | - def _get_f0_post(self, f0, f0_up_key): |
283 | | - f0 *= pow(2, f0_up_key / 12) |
284 | | - if not torch.is_tensor(f0): |
285 | | - f0 = torch.from_numpy(f0) |
286 | | - f0 = f0.float().to(self.device).squeeze() |
287 | | - f0_mel = 1127 * torch.log(1 + f0 / 700) |
288 | | - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / ( |
289 | | - self.f0_mel_max - self.f0_mel_min |
290 | | - ) + 1 |
291 | | - f0_mel[f0_mel <= 1] = 1 |
292 | | - f0_mel[f0_mel > 255] = 255 |
293 | | - f0_coarse = torch.round(f0_mel).long() |
294 | | - return f0_coarse, f0 |
295 | | - |
296 | | - def _get_f0_pm(self, x, f0_up_key, filter_radius): |
297 | | - if not hasattr(self, "pm"): |
298 | | - self.pm = PM(hop_length=160, sampling_rate=16000) |
299 | | - f0 = self.pm.compute_f0(x.cpu().numpy()) |
300 | | - return self._get_f0_post(f0, f0_up_key) |
301 | | - |
302 | | - def _get_f0_harvest(self, x, f0_up_key, filter_radius=3): |
303 | | - if not hasattr(self, "harvest"): |
304 | | - self.harvest = Harvest( |
305 | | - self.window, |
306 | | - self.f0_min, |
307 | | - self.f0_max, |
308 | | - self.sr, |
309 | | - ) |
310 | | - if filter_radius is None: |
311 | | - filter_radius = 3 |
312 | | - f0 = self.harvest.compute_f0(x.cpu().numpy(), filter_radius=filter_radius) |
313 | | - return self._get_f0_post(f0, f0_up_key) |
314 | | - |
315 | | - def _get_f0_dio(self, x, f0_up_key, filter_radius): |
316 | | - if not hasattr(self, "dio"): |
317 | | - self.dio = Dio( |
318 | | - self.window, |
319 | | - self.f0_min, |
320 | | - self.f0_max, |
321 | | - self.sr, |
322 | | - ) |
323 | | - f0 = self.dio.compute_f0(x.cpu().numpy()) |
324 | | - return self._get_f0_post(f0, f0_up_key) |
325 | | - |
326 | | - def _get_f0_crepe(self, x, f0_up_key, filter_radius): |
327 | | - if hasattr(self, "crepe") == False: |
328 | | - self.crepe = CRePE( |
329 | | - self.window, |
330 | | - self.f0_min, |
331 | | - self.f0_max, |
332 | | - self.sr, |
333 | | - self.device, |
334 | | - ) |
335 | | - f0 = self.crepe.compute_f0(x) |
336 | | - return self._get_f0_post(f0, f0_up_key) |
337 | | - |
338 | | - def _get_f0_rmvpe(self, x, f0_up_key, filter_radius=0.03): |
339 | | - if hasattr(self, "rmvpe") == False: |
340 | | - self.rmvpe = RMVPE( |
341 | | - "%s/rmvpe.pt" % os.environ["rmvpe_root"], |
342 | | - is_half=self.is_half, |
343 | | - device=self.device, |
344 | | - use_jit=self.use_jit, |
345 | | - ) |
346 | | - if filter_radius is None: |
347 | | - filter_radius = 0.03 |
348 | | - return self._get_f0_post( |
349 | | - self.rmvpe.compute_f0(x, filter_radius=filter_radius), |
350 | | - f0_up_key, |
351 | | - ) |
352 | | - |
353 | | - def _get_f0_fcpe(self, x, f0_up_key, filter_radius): |
354 | | - if hasattr(self, "fcpe") == False: |
355 | | - self.fcpe = FCPE( |
356 | | - 160, |
357 | | - self.f0_min, |
358 | | - self.f0_max, |
359 | | - 16000, |
360 | | - self.device, |
361 | | - ) |
362 | | - f0 = self.fcpe.compute_f0(x) |
363 | | - return self._get_f0_post(f0, f0_up_key) |
| 266 | + c, f = self.f0_gen.calculate(x, None, f0_up_key, method, filter_radius) |
| 267 | + if not torch.is_tensor(c): |
| 268 | + c = torch.from_numpy(c) |
| 269 | + if not torch.is_tensor(f): |
| 270 | + f = torch.from_numpy(f) |
| 271 | + return c.long().to(self.device), f.float().to(self.device) |
0 commit comments